Skip to main content

cognis_core/output_parsers/
xml.rs

1//! Lightweight XML-ish parser — flat `<tag>value</tag>` extraction.
2
3use std::collections::HashMap;
4
5use async_trait::async_trait;
6
7use crate::output_parsers::OutputParser;
8use crate::runnable::{Runnable, RunnableConfig};
9use crate::{CognisError, Result};
10
11/// Extracts non-nested `<tag>value</tag>` pairs into a flat map.
12///
13/// Nested tags are not supported — if you need structured output, use
14/// [`crate::output_parsers::JsonParser`] instead.
15#[derive(Debug, Default, Clone, Copy)]
16pub struct XmlParser;
17
18impl XmlParser {
19    /// Construct an `XmlParser`.
20    pub fn new() -> Self {
21        Self
22    }
23}
24
25impl OutputParser<HashMap<String, String>> for XmlParser {
26    fn parse(&self, text: &str) -> Result<HashMap<String, String>> {
27        let mut out = HashMap::new();
28        let bytes = text.as_bytes();
29        let mut i = 0;
30        while i < bytes.len() {
31            if bytes[i] != b'<' {
32                i += 1;
33                continue;
34            }
35            let open_start = i + 1;
36            let open_end = match bytes[open_start..].iter().position(|&b| b == b'>') {
37                Some(p) => open_start + p,
38                None => break,
39            };
40            let tag = std::str::from_utf8(&bytes[open_start..open_end])
41                .map_err(|e| CognisError::Serialization(e.to_string()))?;
42            if tag.starts_with('/') || tag.is_empty() {
43                i = open_end + 1;
44                continue;
45            }
46            let close_marker = format!("</{tag}>");
47            let value_start = open_end + 1;
48            let close_pos = match text[value_start..].find(&close_marker) {
49                Some(p) => value_start + p,
50                None => {
51                    i = open_end + 1;
52                    continue;
53                }
54            };
55            let value = text[value_start..close_pos].trim().to_string();
56            out.insert(tag.to_string(), value);
57            i = close_pos + close_marker.len();
58        }
59        Ok(out)
60    }
61
62    fn format_instructions(&self) -> Option<String> {
63        Some("Wrap each field in XML-style tags: `<field>value</field>`. Do not nest tags.".into())
64    }
65}
66
67#[async_trait]
68impl Runnable<String, HashMap<String, String>> for XmlParser {
69    async fn invoke(&self, input: String, _: RunnableConfig) -> Result<HashMap<String, String>> {
70        OutputParser::parse(self, &input)
71    }
72    fn name(&self) -> &str {
73        "XmlParser"
74    }
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn parses_flat_tags() {
83        let p = XmlParser::new();
84        let out = p.parse("<name>Ada</name><role>scientist</role>").unwrap();
85        assert_eq!(out.get("name"), Some(&"Ada".to_string()));
86        assert_eq!(out.get("role"), Some(&"scientist".to_string()));
87    }
88
89    #[test]
90    fn whitespace_around_value_trimmed() {
91        let p = XmlParser::new();
92        let out = p.parse("<x>\n  hello  \n</x>").unwrap();
93        assert_eq!(out.get("x"), Some(&"hello".to_string()));
94    }
95
96    #[test]
97    fn unclosed_tag_skipped() {
98        let p = XmlParser::new();
99        let out = p.parse("<a>1</a><b>open").unwrap();
100        assert_eq!(out.get("a"), Some(&"1".to_string()));
101        assert!(!out.contains_key("b"));
102    }
103}