xml_disassembler/parsers/
parse_xml.rs1use serde_json::Value;
4use tokio::fs;
5
6use crate::parsers::parse_xml_cdata;
7use crate::parsers::strip_whitespace_text_nodes;
8use crate::types::XmlElement;
9
10pub async fn parse_xml(file_path: &str) -> Option<XmlElement> {
12 let content = match fs::read_to_string(file_path).await {
13 Ok(c) => c,
14 Err(e) => {
15 log::error!(
16 "{} was unable to be parsed and will not be processed. Confirm formatting and try again.",
17 file_path
18 );
19 log::debug!("Parse error: {}", e);
20 return None;
21 }
22 };
23 parse_xml_from_str(&content, file_path)
24}
25
26pub fn parse_xml_from_str(content: &str, file_path: &str) -> Option<XmlElement> {
29 let parsed: Value = match parse_xml_cdata::parse_xml_with_cdata(content) {
30 Ok(v) => v,
31 Err(e) => {
32 log::error!(
33 "{} was unable to be parsed and will not be processed. Confirm formatting and try again.",
34 file_path
35 );
36 log::debug!("Parse error: {}", e);
37 return None;
38 }
39 };
40
41 let cleaned = strip_whitespace_text_nodes(&parsed);
42 Some(cleaned)
43}
44
45pub fn extract_xmlns_from_raw(xml_content: &str) -> Option<String> {
48 let re = regex::Regex::new(r#"xmlns="([^"]*)""#).ok()?;
49 re.captures(xml_content).map(|c| c[1].to_string())
50}
51
52pub fn extract_xml_declaration_from_raw(xml_content: &str) -> Option<XmlElement> {
56 let decl_re = regex::Regex::new(r#"<\?xml\s+([^?]+)\?>"#).ok()?;
57 let decl_content = decl_re.captures(xml_content)?.get(1)?.as_str();
58 let mut decl = serde_json::Map::new();
59 let version_re = regex::Regex::new(r#"version="([^"]*)""#).ok()?;
60 if let Some(cap) = version_re.captures(decl_content) {
61 decl.insert("@version".to_string(), Value::String(cap[1].to_string()));
62 } else {
63 return None;
64 }
65 let encoding_re = regex::Regex::new(r#"encoding="([^"]*)""#).ok()?;
66 if let Some(cap) = encoding_re.captures(decl_content) {
67 decl.insert("@encoding".to_string(), Value::String(cap[1].to_string()));
68 }
69 let standalone_re = regex::Regex::new(r#"standalone="([^"]*)""#).ok()?;
70 if let Some(cap) = standalone_re.captures(decl_content) {
71 decl.insert("@standalone".to_string(), Value::String(cap[1].to_string()));
72 }
73 Some(Value::Object(decl))
74}
75
76#[cfg(test)]
77mod tests {
78 use super::*;
79
80 #[test]
81 fn extract_xmlns_from_raw_finds_namespace() {
82 let xml = r#"<root xmlns="http://soap.sforce.com/2006/04/metadata"><a/></root>"#;
83 assert_eq!(
84 extract_xmlns_from_raw(xml),
85 Some("http://soap.sforce.com/2006/04/metadata".to_string())
86 );
87 }
88
89 #[test]
90 fn extract_xmlns_from_raw_returns_none_when_absent() {
91 let xml = r#"<root><a/></root>"#;
92 assert_eq!(extract_xmlns_from_raw(xml), None);
93 }
94
95 #[test]
96 fn extract_xml_declaration_from_raw_parses_version_and_encoding() {
97 let xml = r#"<?xml version="1.0" encoding="UTF-8"?><root/>"#;
98 let decl = extract_xml_declaration_from_raw(xml).unwrap();
99 let obj = decl.as_object().unwrap();
100 assert_eq!(obj.get("@version").and_then(|v| v.as_str()), Some("1.0"));
101 assert_eq!(obj.get("@encoding").and_then(|v| v.as_str()), Some("UTF-8"));
102 }
103
104 #[test]
105 fn extract_xml_declaration_from_raw_parses_standalone() {
106 let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root/>"#;
107 let decl = extract_xml_declaration_from_raw(xml).unwrap();
108 let obj = decl.as_object().unwrap();
109 assert_eq!(obj.get("@standalone").and_then(|v| v.as_str()), Some("yes"));
110 }
111
112 #[test]
113 fn extract_xml_declaration_from_raw_returns_none_without_declaration() {
114 let xml = r#"<root/>"#;
115 assert!(extract_xml_declaration_from_raw(xml).is_none());
116 }
117}