use crate::files::llm_output_extraction::cleaning::unescape_json_strings_aggressive;
trait XmlExtractionStrategy {
fn extract(&self, content: &str) -> Option<String>;
}
fn extract_plan_tags(content: &str) -> Option<String> {
let start = content.find("<ralph-plan>")?;
let end = content.find("</ralph-plan>")?;
if start >= end {
return None;
}
let xml_end = end + "</ralph-plan>".len();
let extracted = &content[start..xml_end];
let unescaped = unescape_json_strings_aggressive(extracted);
Some(unescaped)
}
struct DirectXmlStrategy;
impl XmlExtractionStrategy for DirectXmlStrategy {
fn extract(&self, content: &str) -> Option<String> {
let trimmed = content.trim();
if trimmed.starts_with("<ralph-plan>") {
extract_plan_tags(trimmed)
} else {
None
}
}
}
struct MarkdownFenceStrategy;
impl MarkdownFenceStrategy {
fn extract_from_fence(content: &str, fence_marker: &str) -> Option<String> {
let start = content.find(fence_marker)?;
let after_fence = &content[start + fence_marker.len()..];
let end = after_fence.find("```")?;
let fence_content = after_fence[..end].trim();
extract_plan_tags(fence_content)
}
}
impl XmlExtractionStrategy for MarkdownFenceStrategy {
fn extract(&self, content: &str) -> Option<String> {
Self::extract_from_fence(content, "```xml")
.or_else(|| Self::extract_from_fence(content, "```"))
}
}
struct OpenCodeStrategy;
impl OpenCodeStrategy {
fn accumulate_text(content: &str) -> String {
content
.lines()
.map(str::trim)
.filter(|line| line.starts_with('{'))
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.filter(|json| json.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|json| {
json.get("part")
.and_then(|p| p.get("text"))
.and_then(|v| v.as_str())
.map(str::to_string)
})
.collect()
}
}
impl XmlExtractionStrategy for OpenCodeStrategy {
fn extract(&self, content: &str) -> Option<String> {
let accumulated = Self::accumulate_text(content);
if accumulated.is_empty() {
return None;
}
extract_plan_tags(&accumulated)
}
}
struct JsonResultStrategy;
impl JsonResultStrategy {
fn try_extract_from_value(value: &str) -> Option<String> {
extract_plan_tags(value)
.or_else(|| extract_plan_tags(&unescape_json_strings_aggressive(value)))
}
}
impl XmlExtractionStrategy for JsonResultStrategy {
fn extract(&self, content: &str) -> Option<String> {
content
.lines()
.map(str::trim)
.filter(|line| line.starts_with('{'))
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.find_map(|json| {
["result", "content", "message", "output", "text"]
.iter()
.find_map(|field| {
json.get(field)
.and_then(|v| v.as_str())
.and_then(Self::try_extract_from_value)
})
})
}
}
struct EmbeddedXmlStrategy;
impl XmlExtractionStrategy for EmbeddedXmlStrategy {
fn extract(&self, content: &str) -> Option<String> {
extract_plan_tags(content)
}
}
#[must_use]
pub fn extract_plan_xml(content: &str) -> Option<String> {
let strategies: &[&dyn XmlExtractionStrategy] = &[
&DirectXmlStrategy,
&MarkdownFenceStrategy,
&OpenCodeStrategy,
&JsonResultStrategy,
&EmbeddedXmlStrategy,
];
strategies
.iter()
.filter_map(|strategy| (*strategy).extract(content))
.next()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_direct_xml_strategy() {
let content = "<ralph-plan>
<ralph-summary>Summary</ralph-summary>
<ralph-implementation-steps>1. Step</ralph-implementation-steps>
</ralph-plan>";
let result = extract_plan_xml(content);
assert!(result.is_some());
assert_eq!(result.unwrap(), content);
}
#[test]
fn test_markdown_fence_strategy() {
let content = r"Here's the plan:
```xml
<ralph-plan>
<ralph-summary>Summary</ralph-summary>
<ralph-implementation-steps>1. Step</ralph-implementation-steps>
</ralph-plan>
```
Done!";
let result = extract_plan_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-plan>"));
}
#[test]
fn test_opencode_strategy_multiple_events() {
let content = r#"{"type":"step_start","timestamp":1234567890,"sessionID":"test","part":{"id":"1"}}
{"type":"text","timestamp":1234567891,"sessionID":"test","part":{"text":"<ralph-plan>"}}
{"type":"text","timestamp":1234567892,"sessionID":"test","part":{"text":"\n<ralph-summary>Summary from OpenCode</ralph-summary>"}}
{"type":"text","timestamp":1234567893,"sessionID":"test","part":{"text":"\n<ralph-implementation-steps>1. First step</ralph-implementation-steps>"}}
{"type":"text","timestamp":1234567894,"sessionID":"test","part":{"text":"\n</ralph-plan>"}}
{"type":"step_finish","timestamp":1234567895,"sessionID":"test","part":{"reason":"end_turn"}}"#;
let result = extract_plan_xml(content);
assert!(result.is_some());
let xml = result.unwrap();
assert!(xml.contains("<ralph-plan>"));
assert!(xml.contains("<ralph-summary>Summary from OpenCode</ralph-summary>"));
assert!(
xml.contains("<ralph-implementation-steps>1. First step</ralph-implementation-steps>")
);
assert!(xml.contains("</ralph-plan>"));
}
#[test]
fn test_opencode_strategy_single_event() {
let content = r#"{"type":"text","timestamp":1234567891,"sessionID":"test","part":{"text":"<ralph-plan>\n<ralph-summary>Summary</ralph-summary>\n<ralph-implementation-steps>1. Step</ralph-implementation-steps>\n</ralph-plan>"}}"#;
let result = extract_plan_xml(content);
assert!(result.is_some());
let xml = result.unwrap();
assert!(xml.contains("<ralph-plan>"));
assert!(xml.contains("</ralph-plan>"));
}
#[test]
fn test_json_result_strategy() {
let content = r#"{"type":"result","result":"<ralph-plan>\n<ralph-summary>Summary</ralph-summary>\n<ralph-implementation-steps>1. Step</ralph-implementation-steps>\n</ralph-plan>"}"#;
let result = extract_plan_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("<ralph-plan>"));
}
#[test]
fn test_embedded_xml_strategy() {
let content = r"Based on my analysis:
<ralph-plan>
<ralph-summary>Summary</ralph-summary>
<ralph-implementation-steps>1. Step</ralph-implementation-steps>
</ralph-plan>
That's the plan!";
let result = extract_plan_xml(content);
assert!(result.is_some());
}
#[test]
fn test_no_xml_returns_none() {
let content = "This is just plain text without any XML tags.";
let result = extract_plan_xml(content);
assert!(result.is_none());
}
#[test]
fn test_opencode_accumulate_ignores_non_json_and_non_text_events() {
let content = r#"not json at all
{"type":"step_start","part":{}}
{"type":"text","part":{"text":"<ralph-plan>"}}
random garbage line
{"type":"text","part":{"text":"</ralph-plan>"}}"#;
let accumulated = OpenCodeStrategy::accumulate_text(content);
assert_eq!(accumulated, "<ralph-plan></ralph-plan>");
}
#[test]
fn test_json_result_strategy_searches_multiple_fields() {
let content = r#"{"type":"metadata","content":"not xml"}
{"type":"result","result":"<ralph-plan><ralph-summary>Found in result</ralph-summary></ralph-plan>"}"#;
let result = extract_plan_xml(content);
assert!(result.is_some());
assert!(result.unwrap().contains("Found in result"));
}
}