use crate::files::llm_output_extraction::cleaning::unescape_json_strings_aggressive;
#[must_use]
pub fn extract_issues_xml(content: &str) -> Option<String> {
if let Some(xml) = try_extract_direct_xml(content) {
return Some(xml);
}
if let Some(xml) = try_extract_from_markdown_fence(content) {
return Some(xml);
}
if let Some(xml) = try_extract_from_json_string(content) {
return Some(xml);
}
try_extract_embedded_xml(content)
}
fn try_extract_direct_xml(content: &str) -> Option<String> {
let trimmed = content.trim();
if !trimmed.starts_with("<ralph-issues>") {
return None;
}
let start = trimmed.find("<ralph-issues>")?;
let end = trimmed.find("</ralph-issues>")?;
if start >= end {
return None;
}
let xml_end = end + "</ralph-issues>".len();
Some(trimmed[start..xml_end].to_string())
}
fn try_extract_from_markdown_fence(content: &str) -> Option<String> {
if let Some(start) = content.find("```xml") {
let after_fence = &content[start + 6..];
if let Some(end) = after_fence.find("```") {
let fence_content = after_fence[..end].trim();
if let Some(xml) = extract_ralph_issues_from_content(fence_content) {
return Some(xml);
}
}
}
if let Some(start) = content.find("```") {
let after_fence = &content[start + 3..];
if let Some(end) = after_fence.find("```") {
let fence_content = after_fence[..end].trim();
if let Some(xml) = extract_ralph_issues_from_content(fence_content) {
return Some(xml);
}
}
}
None
}
fn try_extract_from_json_string(content: &str) -> Option<String> {
let try_extract_field = |value: &str| {
extract_ralph_issues_from_content(value).or_else(|| {
let unescaped = unescape_json_strings_aggressive(value);
extract_ralph_issues_from_content(&unescaped)
})
};
content
.lines()
.map(str::trim)
.filter(|line| line.starts_with('{'))
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.find_map(|json| {
["result", "content", "message", "output", "text"]
.iter()
.find_map(|field_name| {
json.get(field_name)
.and_then(|v| v.as_str())
.and_then(try_extract_field)
})
})
.or_else(|| {
let trimmed = content.trim();
if trimmed.starts_with('{') && trimmed.contains(r#""result""#) {
serde_json::from_str::<serde_json::Value>(trimmed)
.ok()
.and_then(|json| {
json.get("result")
.and_then(|v| v.as_str())
.and_then(try_extract_field)
})
} else {
None
}
})
}
fn try_extract_embedded_xml(content: &str) -> Option<String> {
extract_ralph_issues_from_content(content)
}
fn extract_ralph_issues_from_content(content: &str) -> Option<String> {
let start = content.find("<ralph-issues>")?;
let end = content.find("</ralph-issues>")?;
if start >= end {
return None;
}
let xml_end = end + "</ralph-issues>".len();
let extracted = &content[start..xml_end];
let unescaped = unescape_json_strings_aggressive(extracted);
Some(unescaped)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_direct_xml_basic() {
let content = r"<ralph-issues>
<ralph-issue>First issue</ralph-issue>
</ralph-issues>";
let result = extract_issues_xml(content);
assert!(result.is_some());
assert_eq!(result.unwrap(), content);
}
#[test]
fn test_extract_from_xml_fence() {
let content = r"Here's the issues:
```xml
<ralph-issues>
<ralph-issue>First issue</ralph-issue>
</ralph-issues>
```
Done!";
let result = extract_issues_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-issues>"));
}
#[test]
fn test_extract_from_ndjson_result() {
let content = r#"{"type":"result","result":"<ralph-issues>\n<ralph-issue>First issue</ralph-issue>\n</ralph-issues>"}"#;
let result = extract_issues_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-issues>"));
}
#[test]
fn test_extract_embedded_in_analysis() {
let content = r"Based on my review:
<ralph-issues>
<ralph-issue>First issue</ralph-issue>
</ralph-issues>
That's all!";
let result = extract_issues_xml(content);
assert!(result.is_some());
}
#[test]
fn test_extract_no_xml_returns_none() {
let content = r"This is just plain text without any XML tags.";
let result = extract_issues_xml(content);
assert!(result.is_none());
}
#[test]
fn test_extract_from_json_message_field() {
let content = r#"{"type":"event","message":"<ralph-issues>\n<ralph-issue>Issue from message</ralph-issue>\n</ralph-issues>"}"#;
let result = extract_issues_xml(content);
assert!(result.is_some());
let xml = result.unwrap();
assert!(xml.contains("<ralph-issues>"));
assert!(xml.contains("Issue from message"));
}
}