ralph_workflow/files/llm_output_extraction/
xsd_validation_issues.rs1use crate::files::llm_output_extraction::xml_helpers::{
9 create_reader, duplicate_element_error, format_content_preview, malformed_xml_error,
10 read_text_until_end, skip_to_end, text_outside_tags_error, unexpected_element_error,
11};
12use crate::files::llm_output_extraction::xsd_validation::{XsdErrorType, XsdValidationError};
13use quick_xml::events::Event;
14
15const EXAMPLE_ISSUES_XML: &str = r#"<ralph-issues>
17<ralph-issue>Missing error handling in API endpoint</ralph-issue>
18<ralph-issue>Variable shadowing in loop construct</ralph-issue>
19</ralph-issues>"#;
20
21const EXAMPLE_NO_ISSUES_XML: &str = r#"<ralph-issues>
23<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
24</ralph-issues>"#;
25
26pub fn validate_issues_xml(xml_content: &str) -> Result<IssuesElements, XsdValidationError> {
56 let content = xml_content.trim();
57 let mut reader = create_reader(content);
58 let mut buf = Vec::new();
59
60 loop {
62 match reader.read_event_into(&mut buf) {
63 Ok(Event::Start(e)) if e.name().as_ref() == b"ralph-issues" => break,
64 Ok(Event::Start(e)) => {
65 let name_bytes = e.name();
66 let tag_name = String::from_utf8_lossy(name_bytes.as_ref());
67 return Err(XsdValidationError {
68 error_type: XsdErrorType::MissingRequiredElement,
69 element_path: "ralph-issues".to_string(),
70 expected: "<ralph-issues> as root element".to_string(),
71 found: format!("<{}> (wrong root element)", tag_name),
72 suggestion: "Use <ralph-issues> as the root element.".to_string(),
73 example: Some(EXAMPLE_ISSUES_XML.into()),
74 });
75 }
76 Ok(Event::Text(_)) => {
77 }
79 Ok(Event::Eof) => {
80 return Err(XsdValidationError {
81 error_type: XsdErrorType::MissingRequiredElement,
82 element_path: "ralph-issues".to_string(),
83 expected: "<ralph-issues> as root element".to_string(),
84 found: format_content_preview(content),
85 suggestion: "Wrap your issues in <ralph-issues>...</ralph-issues> tags."
86 .to_string(),
87 example: Some(EXAMPLE_ISSUES_XML.into()),
88 });
89 }
90 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
92 }
93 buf.clear();
94 }
95
96 let mut issues: Vec<String> = Vec::new();
98 let mut no_issues_found: Option<String> = None;
99
100 const VALID_TAGS: [&str; 2] = ["ralph-issue", "ralph-no-issues-found"];
101
102 loop {
103 buf.clear();
104 match reader.read_event_into(&mut buf) {
105 Ok(Event::Start(e)) => {
106 match e.name().as_ref() {
107 b"ralph-issue" => {
108 if no_issues_found.is_some() {
110 return Err(XsdValidationError {
111 error_type: XsdErrorType::UnexpectedElement,
112 element_path: "ralph-issues/ralph-issue".to_string(),
113 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
114 found: "mixed issues and no-issues-found".to_string(),
115 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
116 example: Some(EXAMPLE_ISSUES_XML.into()),
117 });
118 }
119 let issue_text = read_text_until_end(&mut reader, b"ralph-issue")?;
120 issues.push(issue_text);
121 }
122 b"ralph-no-issues-found" => {
123 if !issues.is_empty() {
125 return Err(XsdValidationError {
126 error_type: XsdErrorType::UnexpectedElement,
127 element_path: "ralph-issues/ralph-no-issues-found".to_string(),
128 expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
129 found: "mixed issues and no-issues-found".to_string(),
130 suggestion: "Use <ralph-issue> when issues exist, or <ralph-no-issues-found> when no issues exist.".to_string(),
131 example: Some(EXAMPLE_NO_ISSUES_XML.into()),
132 });
133 }
134 if no_issues_found.is_some() {
135 return Err(duplicate_element_error(
136 "ralph-no-issues-found",
137 "ralph-issues",
138 ));
139 }
140 no_issues_found =
141 Some(read_text_until_end(&mut reader, b"ralph-no-issues-found")?);
142 }
143 other => {
144 let _ = skip_to_end(&mut reader, other);
145 return Err(unexpected_element_error(other, &VALID_TAGS, "ralph-issues"));
146 }
147 }
148 }
149 Ok(Event::Text(e)) => {
150 let text = e.unescape().unwrap_or_default();
151 let trimmed = text.trim();
152 if !trimmed.is_empty() {
153 return Err(text_outside_tags_error(trimmed, "ralph-issues"));
154 }
155 }
156 Ok(Event::End(e)) if e.name().as_ref() == b"ralph-issues" => break,
157 Ok(Event::Eof) => {
158 return Err(XsdValidationError {
159 error_type: XsdErrorType::MalformedXml,
160 element_path: "ralph-issues".to_string(),
161 expected: "closing </ralph-issues> tag".to_string(),
162 found: "end of content without closing tag".to_string(),
163 suggestion: "Add </ralph-issues> at the end.".to_string(),
164 example: Some(EXAMPLE_ISSUES_XML.into()),
165 });
166 }
167 Ok(_) => {} Err(e) => return Err(malformed_xml_error(e)),
169 }
170 }
171
172 let filtered_issues: Vec<String> = issues.into_iter().filter(|s| !s.is_empty()).collect();
174 let filtered_no_issues = no_issues_found.filter(|s| !s.is_empty());
175
176 if filtered_issues.is_empty() && filtered_no_issues.is_none() {
178 return Err(XsdValidationError {
179 error_type: XsdErrorType::MissingRequiredElement,
180 element_path: "ralph-issues".to_string(),
181 expected: "at least one <ralph-issue> element OR <ralph-no-issues-found>".to_string(),
182 found: "empty <ralph-issues> element".to_string(),
183 suggestion:
184 "Add <ralph-issue> elements for issues found, or <ralph-no-issues-found> if no issues exist."
185 .to_string(),
186 example: Some(EXAMPLE_ISSUES_XML.into()),
187 });
188 }
189
190 Ok(IssuesElements {
191 issues: filtered_issues,
192 no_issues_found: filtered_no_issues,
193 })
194}
195
196#[derive(Debug, Clone, PartialEq, Eq)]
198pub struct IssuesElements {
199 pub issues: Vec<String>,
201 pub no_issues_found: Option<String>,
203}
204
205impl IssuesElements {
206 #[cfg(any(test, feature = "test-utils"))]
208 pub fn is_empty(&self) -> bool {
209 self.issues.is_empty() && self.no_issues_found.is_some()
210 }
211
212 #[cfg(any(test, feature = "test-utils"))]
214 pub fn issue_count(&self) -> usize {
215 self.issues.len()
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 #[test]
224 fn test_validate_valid_single_issue() {
225 let xml = r#"<ralph-issues>
226<ralph-issue>First issue description</ralph-issue>
227</ralph-issues>"#;
228
229 let result = validate_issues_xml(xml);
230 assert!(result.is_ok());
231 let elements = result.unwrap();
232 assert_eq!(elements.issues.len(), 1);
233 assert_eq!(elements.issues[0], "First issue description");
234 assert!(elements.no_issues_found.is_none());
235 }
236
237 #[test]
238 fn test_validate_valid_multiple_issues() {
239 let xml = r#"<ralph-issues>
240<ralph-issue>First issue</ralph-issue>
241<ralph-issue>Second issue</ralph-issue>
242<ralph-issue>Third issue</ralph-issue>
243</ralph-issues>"#;
244
245 let result = validate_issues_xml(xml);
246 assert!(result.is_ok());
247 let elements = result.unwrap();
248 assert_eq!(elements.issues.len(), 3);
249 assert_eq!(elements.issue_count(), 3);
250 }
251
252 #[test]
253 fn test_validate_valid_no_issues_found() {
254 let xml = r#"<ralph-issues>
255<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
256</ralph-issues>"#;
257
258 let result = validate_issues_xml(xml);
259 assert!(result.is_ok());
260 let elements = result.unwrap();
261 assert!(elements.issues.is_empty());
262 assert!(elements.no_issues_found.is_some());
263 assert!(elements.is_empty());
264 }
265
266 #[test]
267 fn test_validate_missing_root_element() {
268 let xml = r#"Some random text without proper XML tags"#;
269
270 let result = validate_issues_xml(xml);
271 assert!(result.is_err());
272 let error = result.unwrap_err();
273 assert_eq!(error.element_path, "ralph-issues");
274 }
275
276 #[test]
277 fn test_validate_empty_issues() {
278 let xml = r#"<ralph-issues>
279</ralph-issues>"#;
280
281 let result = validate_issues_xml(xml);
282 assert!(result.is_err());
283 let error = result.unwrap_err();
284 assert!(error.expected.contains("at least one"));
285 }
286
287 #[test]
288 fn test_validate_mixed_issues_and_no_issues_found() {
289 let xml = r#"<ralph-issues>
290<ralph-issue>First issue</ralph-issue>
291<ralph-no-issues-found>No issues</ralph-no-issues-found>
292</ralph-issues>"#;
293
294 let result = validate_issues_xml(xml);
295 assert!(result.is_err());
296 let error = result.unwrap_err();
297 assert!(error.suggestion.contains("not both") || error.expected.contains("not both"));
298 }
299
300 #[test]
301 fn test_validate_duplicate_no_issues_found() {
302 let xml = r#"<ralph-issues>
303<ralph-no-issues-found>No issues</ralph-no-issues-found>
304<ralph-no-issues-found>Also no issues</ralph-no-issues-found>
305</ralph-issues>"#;
306
307 let result = validate_issues_xml(xml);
308 assert!(result.is_err());
309 }
310
311 #[test]
312 fn test_validate_whitespace_handling() {
313 let xml =
315 " <ralph-issues> \n <ralph-issue>Issue text</ralph-issue> \n </ralph-issues> ";
316
317 let result = validate_issues_xml(xml);
318 assert!(result.is_ok());
319 }
320
321 #[test]
322 fn test_validate_with_xml_declaration() {
323 let xml = r#"<?xml version="1.0"?>
324<ralph-issues>
325<ralph-issue>Issue text</ralph-issue>
326</ralph-issues>"#;
327
328 let result = validate_issues_xml(xml);
329 assert!(result.is_ok());
330 }
331
332 #[test]
333 fn test_validate_issue_with_code_element() {
334 let xml = r#"<ralph-issues>
336<ralph-issue>Check if <code>a < b</code> is valid</ralph-issue>
337</ralph-issues>"#;
338
339 let result = validate_issues_xml(xml);
340 assert!(result.is_ok());
341 let elements = result.unwrap();
342 assert_eq!(elements.issues.len(), 1);
343 assert!(elements.issues[0].contains("Check if"));
345 assert!(elements.issues[0].contains("a < b"));
346 assert!(elements.issues[0].contains("is valid"));
347 }
348
349 #[test]
350 fn test_validate_no_issues_with_code_element() {
351 let xml = r#"<ralph-issues>
352<ralph-no-issues-found>All <code>Record<string, T></code> types are correct</ralph-no-issues-found>
353</ralph-issues>"#;
354
355 let result = validate_issues_xml(xml);
356 assert!(result.is_ok());
357 let elements = result.unwrap();
358 assert!(elements.no_issues_found.is_some());
359 let msg = elements.no_issues_found.unwrap();
360 assert!(msg.contains("Record<string, T>"));
361 }
362
363 #[test]
369 fn test_llm_realistic_issue_with_generic_type_escaped() {
370 let xml = r#"<ralph-issues>
372<ralph-issue>[High] src/parser.rs:42 - The function <code>parse<T></code> does not handle empty input.
373Suggested fix: Add a check for empty input before parsing.</ralph-issue>
374</ralph-issues>"#;
375
376 let result = validate_issues_xml(xml);
377 assert!(result.is_ok(), "Should parse escaped generic: {:?}", result);
378 let elements = result.unwrap();
379 assert!(elements.issues[0].contains("parse<T>"));
380 }
381
382 #[test]
383 fn test_llm_realistic_issue_with_comparison_escaped() {
384 let xml = r#"<ralph-issues>
386<ralph-issue>[Medium] src/validate.rs:15 - The condition <code>count < 0</code> should be <code>count <= 0</code>.
387Suggested fix: Change the comparison operator.</ralph-issue>
388</ralph-issues>"#;
389
390 let result = validate_issues_xml(xml);
391 assert!(
392 result.is_ok(),
393 "Should parse escaped comparisons: {:?}",
394 result
395 );
396 let elements = result.unwrap();
397 assert!(elements.issues[0].contains("count < 0"));
398 assert!(elements.issues[0].contains("count <= 0"));
399 }
400
401 #[test]
402 fn test_llm_realistic_issue_with_logical_operators_escaped() {
403 let xml = r#"<ralph-issues>
405<ralph-issue>[Low] src/filter.rs:88 - The expression <code>a && b || c</code> has ambiguous precedence.
406Suggested fix: Add explicit parentheses.</ralph-issue>
407</ralph-issues>"#;
408
409 let result = validate_issues_xml(xml);
410 assert!(
411 result.is_ok(),
412 "Should parse escaped logical operators: {:?}",
413 result
414 );
415 let elements = result.unwrap();
416 assert!(elements.issues[0].contains("a && b || c"));
417 }
418
419 #[test]
420 fn test_llm_realistic_issue_with_rust_lifetime() {
421 let xml = r#"<ralph-issues>
423<ralph-issue>[High] src/buffer.rs:23 - The lifetime <code>&'a str</code> should match the struct lifetime.
424Suggested fix: Ensure lifetime annotations are consistent.</ralph-issue>
425</ralph-issues>"#;
426
427 let result = validate_issues_xml(xml);
428 assert!(result.is_ok(), "Should parse lifetime syntax: {:?}", result);
429 let elements = result.unwrap();
430 assert!(elements.issues[0].contains("&'a str"));
431 }
432
433 #[test]
434 fn test_llm_realistic_issue_with_html_in_description() {
435 let xml = r#"<ralph-issues>
437<ralph-issue>[Medium] src/template.rs:56 - The HTML template uses <code><div class="container"></code> but should use semantic tags.
438Suggested fix: Replace with appropriate semantic HTML elements.</ralph-issue>
439</ralph-issues>"#;
440
441 let result = validate_issues_xml(xml);
442 assert!(result.is_ok(), "Should parse HTML in code: {:?}", result);
443 let elements = result.unwrap();
444 assert!(elements.issues[0].contains("<div class=\"container\">"));
445 }
446
447 #[test]
448 fn test_llm_realistic_no_issues_with_detailed_explanation() {
449 let xml = r#"<ralph-issues>
451<ralph-no-issues-found>The implementation correctly handles all edge cases:
452- Input validation properly rejects values where <code>x < 0</code>
453- The generic <code>Result<T, E></code> type is used consistently
454- Error handling follows the project's established patterns
455No issues require attention.</ralph-no-issues-found>
456</ralph-issues>"#;
457
458 let result = validate_issues_xml(xml);
459 assert!(
460 result.is_ok(),
461 "Should parse detailed no-issues: {:?}",
462 result
463 );
464 let elements = result.unwrap();
465 let msg = elements.no_issues_found.unwrap();
466 assert!(msg.contains("x < 0"));
467 assert!(msg.contains("Result<T, E>"));
468 }
469
470 #[test]
471 fn test_llm_realistic_multiple_issues_with_mixed_content() {
472 let xml = r#"<ralph-issues>
474<ralph-issue>[Critical] src/auth.rs:12 - SQL injection vulnerability: user input in <code>query && filter</code> is not sanitized.</ralph-issue>
475<ralph-issue>[High] src/api.rs:45 - Missing null check: <code>response.data</code> may be undefined when <code>status < 200</code>.</ralph-issue>
476<ralph-issue>[Medium] src/utils.rs:78 - The type <code>Option<Vec<T>></code> could be simplified to <code>Vec<T></code> with empty default.</ralph-issue>
477</ralph-issues>"#;
478
479 let result = validate_issues_xml(xml);
480 assert!(
481 result.is_ok(),
482 "Should parse multiple issues with mixed content: {:?}",
483 result
484 );
485 let elements = result.unwrap();
486 assert_eq!(elements.issues.len(), 3);
487 assert!(elements.issues[0].contains("query && filter"));
488 assert!(elements.issues[1].contains("status < 200"));
489 assert!(elements.issues[2].contains("Option<Vec<T>>"));
490 }
491
492 #[test]
493 fn test_llm_mistake_unescaped_less_than_fails() {
494 let xml = r#"<ralph-issues>
496<ralph-issue>[High] src/compare.rs:10 - The condition a < b is wrong.</ralph-issue>
497</ralph-issues>"#;
498
499 let result = validate_issues_xml(xml);
500 assert!(
501 result.is_err(),
502 "Unescaped < should fail XML parsing: {:?}",
503 result
504 );
505 }
506
507 #[test]
508 fn test_llm_mistake_unescaped_generic_fails() {
509 let xml = r#"<ralph-issues>
511<ralph-issue>[High] src/types.rs:5 - The type Vec<String> is incorrect.</ralph-issue>
512</ralph-issues>"#;
513
514 let result = validate_issues_xml(xml);
515 assert!(
516 result.is_err(),
517 "Unescaped generic should fail XML parsing: {:?}",
518 result
519 );
520 }
521
522 #[test]
523 fn test_llm_mistake_unescaped_ampersand_fails() {
524 let xml = r#"<ralph-issues>
526<ralph-issue>[High] src/logic.rs:20 - The expression a && b is wrong.</ralph-issue>
527</ralph-issues>"#;
528
529 let result = validate_issues_xml(xml);
530 assert!(
531 result.is_err(),
532 "Unescaped && should fail XML parsing: {:?}",
533 result
534 );
535 }
536
537 #[test]
538 fn test_llm_uses_cdata_for_code_content() {
539 let xml = r#"<ralph-issues>
541<ralph-issue>[High] src/cmp.rs:10 - The condition <code><![CDATA[a < b && c > d]]></code> has issues.</ralph-issue>
542</ralph-issues>"#;
543
544 let result = validate_issues_xml(xml);
545 assert!(result.is_ok(), "CDATA should be valid: {:?}", result);
546 let elements = result.unwrap();
547 assert!(elements.issues[0].contains("a < b && c > d"));
548 }
549}