use crate::files::llm_output_extraction::cleaning::unescape_json_strings_aggressive;
#[must_use]
pub fn extract_fix_result_xml(content: &str) -> Option<String> {
if let Some(xml) = try_extract_direct_xml(content) {
return Some(xml);
}
if let Some(xml) = try_extract_from_markdown_fence(content) {
return Some(xml);
}
if let Some(xml) = try_extract_from_json_string(content) {
return Some(xml);
}
try_extract_embedded_xml(content)
}
fn try_extract_direct_xml(content: &str) -> Option<String> {
let trimmed = content.trim();
if !trimmed.starts_with("<ralph-fix-result>") {
return None;
}
let start = trimmed.find("<ralph-fix-result>")?;
let end = trimmed.find("</ralph-fix-result>")?;
if start >= end {
return None;
}
let xml_end = end + "</ralph-fix-result>".len();
Some(trimmed[start..xml_end].to_string())
}
fn try_extract_from_markdown_fence(content: &str) -> Option<String> {
if let Some(start) = content.find("```xml") {
let after_fence = &content[start + 6..];
if let Some(end) = after_fence.find("```") {
let fence_content = after_fence[..end].trim();
if let Some(xml) = extract_ralph_fix_result_from_content(fence_content) {
return Some(xml);
}
}
}
if let Some(start) = content.find("```") {
let after_fence = &content[start + 3..];
if let Some(end) = after_fence.find("```") {
let fence_content = after_fence[..end].trim();
if let Some(xml) = extract_ralph_fix_result_from_content(fence_content) {
return Some(xml);
}
}
}
None
}
fn try_extract_from_json_string(content: &str) -> Option<String> {
let try_extract_field = |value: &str| {
extract_ralph_fix_result_from_content(value).or_else(|| {
let unescaped = unescape_json_strings_aggressive(value);
extract_ralph_fix_result_from_content(&unescaped)
})
};
let ndjson_result = content
.lines()
.map(str::trim)
.filter(|line| line.starts_with('{'))
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.find_map(|json| {
["result", "content", "message", "output", "text"]
.iter()
.find_map(|field_name| {
json.get(field_name)
.and_then(|v| v.as_str())
.and_then(try_extract_field)
})
});
if ndjson_result.is_some() {
return ndjson_result;
}
let trimmed = content.trim();
if trimmed.starts_with('{') && trimmed.contains(r#""result""#) {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
if let Some(result) = json.get("result").and_then(|v| v.as_str()) {
return try_extract_field(result);
}
}
}
None
}
fn try_extract_embedded_xml(content: &str) -> Option<String> {
extract_ralph_fix_result_from_content(content)
}
fn extract_ralph_fix_result_from_content(content: &str) -> Option<String> {
let start = content.find("<ralph-fix-result>")?;
let end = content.find("</ralph-fix-result>")?;
if start >= end {
return None;
}
let xml_end = end + "</ralph-fix-result>".len();
let extracted = &content[start..xml_end];
let unescaped = unescape_json_strings_aggressive(extracted);
Some(unescaped)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_direct_xml_basic() {
let content = r"<ralph-fix-result>
<ralph-status>all_issues_addressed</ralph-status>
</ralph-fix-result>";
let result = extract_fix_result_xml(content);
assert!(result.is_some());
assert_eq!(result.unwrap(), content);
}
#[test]
fn test_extract_from_xml_fence() {
let content = r"Here's the result:
```xml
<ralph-fix-result>
<ralph-status>all_issues_addressed</ralph-status>
</ralph-fix-result>
```
Done!";
let result = extract_fix_result_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-fix-result>"));
}
#[test]
fn test_extract_from_ndjson_result() {
let content = r#"{"type":"result","result":"<ralph-fix-result>\n<ralph-status>all_issues_addressed</ralph-status>\n</ralph-fix-result>"}"#;
let result = extract_fix_result_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-fix-result>"));
}
#[test]
fn test_extract_embedded_in_analysis() {
let content = r"Based on my fixes:
<ralph-fix-result>
<ralph-status>all_issues_addressed</ralph-status>
</ralph-fix-result>
That's all!";
let result = extract_fix_result_xml(content);
assert!(result.is_some());
}
#[test]
fn test_extract_no_xml_returns_none() {
let content = r"This is just plain text without any XML tags.";
let result = extract_fix_result_xml(content);
assert!(result.is_none());
}
#[test]
fn test_extract_from_json_content_field() {
let content = r#"{"type":"metadata","content":"<ralph-fix-result><ralph-status>all_issues_addressed</ralph-status></ralph-fix-result>"}"#;
let result = extract_fix_result_xml(content);
assert!(result.is_some());
assert!(result
.unwrap()
.contains("<ralph-status>all_issues_addressed</ralph-status>"));
}
}