Skip to main content

drasi_source_http/
content_parser.rs

1// Copyright 2025 The Drasi Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Content parsing for webhook payloads.
16//!
17//! Supports JSON, XML, YAML, and plain text content types with
18//! automatic detection from Content-Type header.
19
20use anyhow::{anyhow, Result};
21use serde_json::Value as JsonValue;
22
23/// Supported content types for webhook payloads
24#[derive(Debug, Clone, PartialEq)]
25pub enum ContentType {
26    Json,
27    Xml,
28    Yaml,
29    Text,
30}
31
32impl ContentType {
33    /// Parse content type from Content-Type header value
34    pub fn from_header(header: Option<&str>) -> Self {
35        match header {
36            Some(h) => {
37                let lower = h.to_lowercase();
38                if lower.contains("application/json") || lower.contains("text/json") {
39                    ContentType::Json
40                } else if lower.contains("application/xml") || lower.contains("text/xml") {
41                    ContentType::Xml
42                } else if lower.contains("application/x-yaml")
43                    || lower.contains("text/yaml")
44                    || lower.contains("application/yaml")
45                {
46                    ContentType::Yaml
47                } else if lower.contains("text/plain") {
48                    ContentType::Text
49                } else {
50                    // Default to JSON for unknown types
51                    ContentType::Json
52                }
53            }
54            None => ContentType::Json,
55        }
56    }
57}
58
59/// Parse content body into a JSON value based on content type
60///
61/// All content types are normalized to `serde_json::Value` for uniform
62/// template processing.
63pub fn parse_content(body: &[u8], content_type: ContentType) -> Result<JsonValue> {
64    match content_type {
65        ContentType::Json => parse_json(body),
66        ContentType::Xml => parse_xml(body),
67        ContentType::Yaml => parse_yaml(body),
68        ContentType::Text => parse_text(body),
69    }
70}
71
72/// Parse JSON content
73fn parse_json(body: &[u8]) -> Result<JsonValue> {
74    serde_json::from_slice(body).map_err(|e| anyhow!("Failed to parse JSON: {e}"))
75}
76
77/// Parse YAML content and convert to JSON value
78fn parse_yaml(body: &[u8]) -> Result<JsonValue> {
79    let yaml_value: serde_yaml::Value =
80        serde_yaml::from_slice(body).map_err(|e| anyhow!("Failed to parse YAML: {e}"))?;
81    yaml_to_json(yaml_value)
82}
83
84/// Convert YAML value to JSON value
85fn yaml_to_json(yaml: serde_yaml::Value) -> Result<JsonValue> {
86    match yaml {
87        serde_yaml::Value::Null => Ok(JsonValue::Null),
88        serde_yaml::Value::Bool(b) => Ok(JsonValue::Bool(b)),
89        serde_yaml::Value::Number(n) => {
90            if let Some(i) = n.as_i64() {
91                Ok(JsonValue::Number(i.into()))
92            } else if let Some(f) = n.as_f64() {
93                Ok(serde_json::Number::from_f64(f)
94                    .map(JsonValue::Number)
95                    .unwrap_or(JsonValue::Null))
96            } else {
97                Ok(JsonValue::Null)
98            }
99        }
100        serde_yaml::Value::String(s) => Ok(JsonValue::String(s)),
101        serde_yaml::Value::Sequence(seq) => {
102            let arr: Result<Vec<JsonValue>> = seq.into_iter().map(yaml_to_json).collect();
103            Ok(JsonValue::Array(arr?))
104        }
105        serde_yaml::Value::Mapping(map) => {
106            let mut obj = serde_json::Map::new();
107            for (k, v) in map {
108                let key = match k {
109                    serde_yaml::Value::String(s) => s,
110                    serde_yaml::Value::Number(n) => n.to_string(),
111                    serde_yaml::Value::Bool(b) => b.to_string(),
112                    _ => continue,
113                };
114                obj.insert(key, yaml_to_json(v)?);
115            }
116            Ok(JsonValue::Object(obj))
117        }
118        serde_yaml::Value::Tagged(tagged) => yaml_to_json(tagged.value),
119    }
120}
121
122/// Parse XML content and convert to JSON value
123fn parse_xml(body: &[u8]) -> Result<JsonValue> {
124    let xml_str = std::str::from_utf8(body).map_err(|e| anyhow!("Invalid UTF-8 in XML: {e}"))?;
125    xml_to_json(xml_str)
126}
127
128/// Convert XML string to JSON value
129///
130/// Uses a simplified conversion where:
131/// - Elements become objects
132/// - Text content goes into a "_text" field
133/// - Attributes go into "@attribute_name" fields
134/// - Repeated elements become arrays
135fn xml_to_json(xml: &str) -> Result<JsonValue> {
136    use quick_xml::events::Event;
137    use quick_xml::Reader;
138
139    let mut reader = Reader::from_str(xml);
140    reader.config_mut().trim_text(true);
141
142    let mut stack: Vec<(String, JsonValue)> = vec![(
143        "root".to_string(),
144        JsonValue::Object(serde_json::Map::new()),
145    )];
146
147    loop {
148        match reader.read_event() {
149            Ok(Event::Start(e)) => {
150                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
151                let mut obj = serde_json::Map::new();
152
153                // Add attributes
154                for attr in e.attributes().flatten() {
155                    let key = format!("@{}", String::from_utf8_lossy(attr.key.as_ref()));
156                    let value = String::from_utf8_lossy(&attr.value).to_string();
157                    obj.insert(key, JsonValue::String(value));
158                }
159
160                stack.push((name, JsonValue::Object(obj)));
161            }
162            Ok(Event::End(_)) => {
163                if stack.len() > 1 {
164                    if let Some((name, value)) = stack.pop() {
165                        if let Some((_, JsonValue::Object(parent_obj))) = stack.last_mut() {
166                            // Handle repeated elements by converting to array
167                            if let Some(existing) = parent_obj.get_mut(&name) {
168                                match existing {
169                                    JsonValue::Array(arr) => arr.push(value),
170                                    _ => {
171                                        let prev = existing.take();
172                                        *existing = JsonValue::Array(vec![prev, value]);
173                                    }
174                                }
175                            } else {
176                                parent_obj.insert(name, value);
177                            }
178                        }
179                    }
180                }
181            }
182            Ok(Event::Text(e)) => {
183                let text = e
184                    .unescape()
185                    .map_err(|e| anyhow!("XML unescape error: {e}"))?;
186                let text = text.trim();
187                if !text.is_empty() {
188                    if let Some((_, current)) = stack.last_mut() {
189                        if let JsonValue::Object(obj) = current {
190                            if obj.is_empty() {
191                                // If no attributes, just use the text value directly
192                                *current = JsonValue::String(text.to_string());
193                            } else {
194                                obj.insert(
195                                    "_text".to_string(),
196                                    JsonValue::String(text.to_string()),
197                                );
198                            }
199                        }
200                    }
201                }
202            }
203            Ok(Event::Empty(e)) => {
204                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
205                let mut obj = serde_json::Map::new();
206
207                for attr in e.attributes().flatten() {
208                    let key = format!("@{}", String::from_utf8_lossy(attr.key.as_ref()));
209                    let value = String::from_utf8_lossy(&attr.value).to_string();
210                    obj.insert(key, JsonValue::String(value));
211                }
212
213                let value = if obj.is_empty() {
214                    JsonValue::Null
215                } else {
216                    JsonValue::Object(obj)
217                };
218
219                if let Some((_, JsonValue::Object(parent_obj))) = stack.last_mut() {
220                    if let Some(existing) = parent_obj.get_mut(&name) {
221                        match existing {
222                            JsonValue::Array(arr) => arr.push(value),
223                            _ => {
224                                let prev = existing.take();
225                                *existing = JsonValue::Array(vec![prev, value]);
226                            }
227                        }
228                    } else {
229                        parent_obj.insert(name, value);
230                    }
231                }
232            }
233            Ok(Event::Eof) => break,
234            Err(e) => return Err(anyhow!("XML parse error: {e}")),
235            _ => {}
236        }
237    }
238
239    // Return the root object's content
240    let Some((_, JsonValue::Object(mut root))) = stack.pop() else {
241        return Err(anyhow!("Failed to parse XML structure"));
242    };
243
244    // If there's only one child, return it directly
245    if root.len() == 1 {
246        Ok(root
247            .into_iter()
248            .next()
249            .map(|(_, v)| v)
250            .unwrap_or(JsonValue::Null))
251    } else {
252        Ok(JsonValue::Object(root))
253    }
254}
255
256/// Parse plain text content
257fn parse_text(body: &[u8]) -> Result<JsonValue> {
258    let text =
259        std::str::from_utf8(body).map_err(|e| anyhow!("Invalid UTF-8 in text content: {e}"))?;
260    Ok(JsonValue::String(text.to_string()))
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266
267    #[test]
268    fn test_content_type_from_header() {
269        assert_eq!(
270            ContentType::from_header(Some("application/json")),
271            ContentType::Json
272        );
273        assert_eq!(
274            ContentType::from_header(Some("application/json; charset=utf-8")),
275            ContentType::Json
276        );
277        assert_eq!(
278            ContentType::from_header(Some("text/json")),
279            ContentType::Json
280        );
281        assert_eq!(
282            ContentType::from_header(Some("application/xml")),
283            ContentType::Xml
284        );
285        assert_eq!(ContentType::from_header(Some("text/xml")), ContentType::Xml);
286        assert_eq!(
287            ContentType::from_header(Some("application/x-yaml")),
288            ContentType::Yaml
289        );
290        assert_eq!(
291            ContentType::from_header(Some("text/yaml")),
292            ContentType::Yaml
293        );
294        assert_eq!(
295            ContentType::from_header(Some("text/plain")),
296            ContentType::Text
297        );
298        assert_eq!(ContentType::from_header(None), ContentType::Json);
299        assert_eq!(
300            ContentType::from_header(Some("unknown/type")),
301            ContentType::Json
302        );
303    }
304
305    #[test]
306    fn test_parse_json() {
307        let json = r#"{"name": "test", "value": 42, "nested": {"key": "value"}}"#;
308        let result = parse_content(json.as_bytes(), ContentType::Json).unwrap();
309
310        assert_eq!(result["name"], "test");
311        assert_eq!(result["value"], 42);
312        assert_eq!(result["nested"]["key"], "value");
313    }
314
315    #[test]
316    fn test_parse_json_array() {
317        let json = r#"[1, 2, 3]"#;
318        let result = parse_content(json.as_bytes(), ContentType::Json).unwrap();
319
320        assert!(result.is_array());
321        assert_eq!(result.as_array().unwrap().len(), 3);
322    }
323
324    #[test]
325    fn test_parse_yaml() {
326        let yaml = r#"
327name: test
328value: 42
329nested:
330  key: value
331items:
332  - one
333  - two
334"#;
335        let result = parse_content(yaml.as_bytes(), ContentType::Yaml).unwrap();
336
337        assert_eq!(result["name"], "test");
338        assert_eq!(result["value"], 42);
339        assert_eq!(result["nested"]["key"], "value");
340        assert_eq!(result["items"][0], "one");
341        assert_eq!(result["items"][1], "two");
342    }
343
344    #[test]
345    fn test_parse_xml_simple() {
346        let xml = r#"<root><name>test</name><value>42</value></root>"#;
347        let result = parse_content(xml.as_bytes(), ContentType::Xml).unwrap();
348
349        assert_eq!(result["name"], "test");
350        assert_eq!(result["value"], "42");
351    }
352
353    #[test]
354    fn test_parse_xml_with_attributes() {
355        let xml = r#"<item id="123" type="test">content</item>"#;
356        let result = parse_content(xml.as_bytes(), ContentType::Xml).unwrap();
357
358        assert_eq!(result["@id"], "123");
359        assert_eq!(result["@type"], "test");
360        assert_eq!(result["_text"], "content");
361    }
362
363    #[test]
364    fn test_parse_xml_repeated_elements() {
365        let xml = r#"<root><item>one</item><item>two</item><item>three</item></root>"#;
366        let result = parse_content(xml.as_bytes(), ContentType::Xml).unwrap();
367
368        assert!(result["item"].is_array());
369        let items = result["item"].as_array().unwrap();
370        assert_eq!(items.len(), 3);
371        assert_eq!(items[0], "one");
372        assert_eq!(items[1], "two");
373        assert_eq!(items[2], "three");
374    }
375
376    #[test]
377    fn test_parse_text() {
378        let text = "Hello, World!";
379        let result = parse_content(text.as_bytes(), ContentType::Text).unwrap();
380
381        assert_eq!(result, JsonValue::String("Hello, World!".to_string()));
382    }
383
384    #[test]
385    fn test_parse_invalid_json() {
386        let invalid = "not valid json";
387        let result = parse_content(invalid.as_bytes(), ContentType::Json);
388        assert!(result.is_err());
389    }
390
391    #[test]
392    fn test_parse_invalid_yaml() {
393        let invalid = "key: [unclosed";
394        let result = parse_content(invalid.as_bytes(), ContentType::Yaml);
395        assert!(result.is_err());
396    }
397
398    #[test]
399    fn test_parse_invalid_xml() {
400        // This is malformed XML with mismatched tags
401        let invalid = "<root><unclosed></root>";
402        let result = parse_content(invalid.as_bytes(), ContentType::Xml);
403        assert!(result.is_err());
404    }
405}