ralph_workflow/files/llm_output_extraction/
xsd_validation_issues.rs

1//! XSD validation for issues XML format.
2//!
3//! This module provides validation of XML output against the XSD schema
4//! to ensure AI agent output conforms to the expected format for review issues.
5
6use crate::files::llm_output_extraction::xsd_validation::XsdValidationError;
7
8/// Validate issues XML content against the XSD schema.
9///
10/// This function validates that the XML content conforms to the expected
11/// issues format defined in issues.xsd:
12///
13/// ```xml
14/// <ralph-issues>
15///   <ralph-issue>First issue description</ralph-issue>
16///   <ralph-issue>Second issue description</ralph-issue>
17///   ...
18/// </ralph-issues>
19/// ```
20///
21/// OR for no issues:
22///
23/// ```xml
24/// <ralph-issues>
25///   <ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
26/// </ralph-issues>
27/// ```
28///
29/// # Arguments
30///
31/// * `xml_content` - The XML content to validate
32///
33/// # Returns
34///
35/// * `Ok(IssuesElements)` if the XML is valid and contains all required elements
36/// * `Err(XsdValidationError)` if the XML is invalid or doesn't conform to the schema
37pub fn validate_issues_xml(xml_content: &str) -> Result<IssuesElements, XsdValidationError> {
38    let content = xml_content.trim();
39
40    // Check for XML declaration (optional, so we skip it if present)
41    let content = if content.starts_with("<?xml") {
42        if let Some(end) = content.find("?>") {
43            &content[end + 2..]
44        } else {
45            return Err(XsdValidationError {
46                error_type:
47                    crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MalformedXml,
48                element_path: "xml".to_string(),
49                expected: "valid XML declaration ending with ?>".to_string(),
50                found: "unclosed XML declaration".to_string(),
51                suggestion: "Ensure XML declaration is properly closed with ?>".to_string(),
52            });
53        }
54    } else {
55        content
56    };
57
58    let content = content.trim();
59
60    // Check for <ralph-issues> root element
61    if !content.starts_with("<ralph-issues>") {
62        return Err(XsdValidationError {
63            error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
64            element_path: "ralph-issues".to_string(),
65            expected: "<ralph-issues> as root element".to_string(),
66            found: if content.is_empty() {
67                "empty content".to_string()
68            } else if content.len() < 50 {
69                content.to_string()
70            } else {
71                format!("{}...", &content[..50])
72            },
73            suggestion: "Wrap your issues in <ralph-issues> tags".to_string(),
74        });
75    }
76
77    if !content.ends_with("</ralph-issues>") {
78        return Err(XsdValidationError {
79            error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
80            element_path: "ralph-issues".to_string(),
81            expected: "closing </ralph-issues> tag".to_string(),
82            found: "missing closing tag".to_string(),
83            suggestion: "Add </ralph-issues> at the end of your issues".to_string(),
84        });
85    }
86
87    // Extract content between root tags
88    let root_start = "<ralph-issues>".len();
89    let root_end = content.len() - "</ralph-issues>".len();
90    let issues_content = &content[root_start..root_end];
91
92    // Parse elements
93    let mut issues = Vec::new();
94    let mut no_issues_found = None;
95
96    // Parse elements in order
97    let mut remaining = issues_content.trim();
98
99    while !remaining.is_empty() {
100        // Try to parse ralph-issue elements
101        if let Some(tag_content) = extract_tag_content(remaining, "ralph-issue") {
102            // Cannot mix issues and no-issues-found
103            if no_issues_found.is_some() {
104                return Err(XsdValidationError {
105                    error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
106                    element_path: "ralph-issue".to_string(),
107                    expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
108                    found: "mixed issues and no-issues-found".to_string(),
109                    suggestion: "Use either <ralph-issue> elements when issues exist, or <ralph-no-issues-found> when no issues exist, not both".to_string(),
110                });
111            }
112            issues.push(tag_content);
113            remaining = advance_past_tag(remaining, "ralph-issue");
114            continue;
115        }
116
117        // Try to parse ralph-no-issues-found element
118        if let Some(tag_content) = extract_tag_content(remaining, "ralph-no-issues-found") {
119            // Cannot mix issues and no-issues-found
120            if !issues.is_empty() {
121                return Err(XsdValidationError {
122                    error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
123                    element_path: "ralph-no-issues-found".to_string(),
124                    expected: "either <ralph-issue> elements OR <ralph-no-issues-found>, not both".to_string(),
125                    found: "mixed issues and no-issues-found".to_string(),
126                    suggestion: "Use either <ralph-issue> elements when issues exist, or <ralph-no-issues-found> when no issues exist, not both".to_string(),
127                });
128            }
129            if no_issues_found.is_some() {
130                return Err(XsdValidationError {
131                    error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
132                    element_path: "ralph-no-issues-found".to_string(),
133                    expected: "only one <ralph-no-issues-found> element".to_string(),
134                    found: "duplicate <ralph-no-issues-found> element".to_string(),
135                    suggestion: "Include only one <ralph-no-issues-found> element".to_string(),
136                });
137            }
138            no_issues_found = Some(tag_content);
139            remaining = advance_past_tag(remaining, "ralph-no-issues-found");
140            continue;
141        }
142
143        // If we get here, there's unexpected content
144        let first_fifty = if remaining.len() > 50 {
145            format!("{}...", &remaining[..50])
146        } else {
147            remaining.to_string()
148        };
149
150        // Try to identify what the unexpected content is
151        if remaining.starts_with('<') {
152            if let Some(tag_end) = remaining.find('>') {
153                let potential_tag = &remaining[..tag_end + 1];
154                return Err(XsdValidationError {
155                    error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::UnexpectedElement,
156                    element_path: potential_tag.to_string(),
157                    expected: "only valid issues tags".to_string(),
158                    found: format!("unexpected tag: {potential_tag}"),
159                    suggestion: "Remove the unexpected tag. Valid tags are: <ralph-issue>, <ralph-no-issues-found>".to_string(),
160                });
161            }
162        }
163
164        return Err(XsdValidationError {
165            error_type:
166                crate::files::llm_output_extraction::xsd_validation::XsdErrorType::InvalidContent,
167            element_path: "content".to_string(),
168            expected: "only XML tags".to_string(),
169            found: first_fifty,
170            suggestion:
171                "Remove any text outside of XML tags. All content must be within appropriate tags."
172                    .to_string(),
173        });
174    }
175
176    // Must have either issues or no-issues-found
177    // Note: We check after filtering because whitespace-only issues should be treated as empty
178    let filtered_issues: Vec<String> = issues
179        .into_iter()
180        .map(|s| s.trim().to_string())
181        .filter(|s| !s.is_empty())
182        .collect();
183    let filtered_no_issues = no_issues_found
184        .map(|s| s.trim().to_string())
185        .filter(|s| !s.is_empty());
186
187    if filtered_issues.is_empty() && filtered_no_issues.is_none() {
188        return Err(XsdValidationError {
189            error_type: crate::files::llm_output_extraction::xsd_validation::XsdErrorType::MissingRequiredElement,
190            element_path: "ralph-issues".to_string(),
191            expected: "expected at least one <ralph-issue> element OR <ralph-no-issues-found>".to_string(),
192            found: "no issues or no-issues-found element".to_string(),
193            suggestion: "Add either <ralph-issue> elements for issues found, or <ralph-no-issues-found> if no issues exist".to_string(),
194        });
195    }
196
197    Ok(IssuesElements {
198        issues: filtered_issues,
199        no_issues_found: filtered_no_issues,
200    })
201}
202
203/// Extract content from an XML-style tag.
204fn extract_tag_content(content: &str, tag_name: &str) -> Option<String> {
205    let open_tag = format!("<{tag_name}>");
206    let close_tag = format!("</{tag_name}>");
207
208    let content_trimmed = content.trim_start();
209    if !content_trimmed.starts_with(&open_tag) {
210        return None;
211    }
212
213    let open_pos = content.len() - content_trimmed.len();
214    let content_after_open = &content[open_pos + open_tag.len()..];
215
216    let close_pos = content_after_open.find(&close_tag)?;
217    let inner = &content_after_open[..close_pos];
218    Some(inner.to_string())
219}
220
221/// Advance the content pointer past the specified tag.
222fn advance_past_tag<'a>(content: &'a str, tag_name: &str) -> &'a str {
223    let close_tag = format!("</{tag_name}>");
224    let trimmed = content.trim_start();
225
226    if let Some(pos) = trimmed.find(&close_tag) {
227        let after_close = &trimmed[pos + close_tag.len()..];
228        after_close.trim_start()
229    } else {
230        &content[content.len()..]
231    }
232}
233
234/// Parsed issues elements from valid XML.
235#[derive(Debug, Clone, PartialEq, Eq)]
236pub struct IssuesElements {
237    /// List of issues (if any)
238    pub issues: Vec<String>,
239    /// No issues found message (if no issues)
240    pub no_issues_found: Option<String>,
241}
242
243impl IssuesElements {
244    /// Returns true if there are no issues.
245    #[cfg(any(test, feature = "test-utils"))]
246    pub fn is_empty(&self) -> bool {
247        self.issues.is_empty() && self.no_issues_found.is_some()
248    }
249
250    /// Returns the number of issues.
251    #[cfg(any(test, feature = "test-utils"))]
252    pub fn issue_count(&self) -> usize {
253        self.issues.len()
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn test_validate_valid_single_issue() {
263        let xml = r#"<ralph-issues>
264<ralph-issue>First issue description</ralph-issue>
265</ralph-issues>"#;
266
267        let result = validate_issues_xml(xml);
268        assert!(result.is_ok());
269        let elements = result.unwrap();
270        assert_eq!(elements.issues.len(), 1);
271        assert_eq!(elements.issues[0], "First issue description");
272        assert!(elements.no_issues_found.is_none());
273    }
274
275    #[test]
276    fn test_validate_valid_multiple_issues() {
277        let xml = r#"<ralph-issues>
278<ralph-issue>First issue</ralph-issue>
279<ralph-issue>Second issue</ralph-issue>
280<ralph-issue>Third issue</ralph-issue>
281</ralph-issues>"#;
282
283        let result = validate_issues_xml(xml);
284        assert!(result.is_ok());
285        let elements = result.unwrap();
286        assert_eq!(elements.issues.len(), 3);
287        assert_eq!(elements.issue_count(), 3);
288    }
289
290    #[test]
291    fn test_validate_valid_no_issues_found() {
292        let xml = r#"<ralph-issues>
293<ralph-no-issues-found>No issues were found during review</ralph-no-issues-found>
294</ralph-issues>"#;
295
296        let result = validate_issues_xml(xml);
297        assert!(result.is_ok());
298        let elements = result.unwrap();
299        assert!(elements.issues.is_empty());
300        assert!(elements.no_issues_found.is_some());
301        assert!(elements.is_empty());
302    }
303
304    #[test]
305    fn test_validate_missing_root_element() {
306        let xml = r#"Some random text without proper XML tags"#;
307
308        let result = validate_issues_xml(xml);
309        assert!(result.is_err());
310        let error = result.unwrap_err();
311        assert_eq!(error.element_path, "ralph-issues");
312    }
313
314    #[test]
315    fn test_validate_empty_issues() {
316        let xml = r#"<ralph-issues>
317</ralph-issues>"#;
318
319        let result = validate_issues_xml(xml);
320        assert!(result.is_err());
321        let error = result.unwrap_err();
322        assert!(error.expected.contains("at least one"));
323    }
324
325    #[test]
326    fn test_validate_mixed_issues_and_no_issues_found() {
327        let xml = r#"<ralph-issues>
328<ralph-issue>First issue</ralph-issue>
329<ralph-no-issues-found>No issues</ralph-no-issues-found>
330</ralph-issues>"#;
331
332        let result = validate_issues_xml(xml);
333        assert!(result.is_err());
334        let error = result.unwrap_err();
335        assert!(error.suggestion.contains("not both"));
336    }
337
338    #[test]
339    fn test_validate_duplicate_no_issues_found() {
340        let xml = r#"<ralph-issues>
341<ralph-no-issues-found>No issues</ralph-no-issues-found>
342<ralph-no-issues-found>Also no issues</ralph-no-issues-found>
343</ralph-issues>"#;
344
345        let result = validate_issues_xml(xml);
346        assert!(result.is_err());
347    }
348}