1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct XmlDeclaration {
7 pub version: Option<String>,
8 pub encoding: Option<String>,
9 pub standalone: Option<String>,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct XmlAttribute {
15 pub name: String,
16 pub value: String,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct XmlElement {
22 pub name: String,
23 pub attributes: Vec<XmlAttribute>,
24}
25
26pub fn looks_like_xml(input: &str) -> bool {
28 has_xml_declaration(input) || extract_root_element(input).is_some()
29}
30
31pub fn has_xml_declaration(input: &str) -> bool {
33 input.trim_start().starts_with("<?xml")
34}
35
36pub fn extract_xml_declaration(input: &str) -> Option<XmlDeclaration> {
38 let trimmed = input.trim_start();
39 if !trimmed.starts_with("<?xml") {
40 return None;
41 }
42
43 let end = trimmed.find("?>")?;
44 let declaration = &trimmed[5..end];
45 let attributes = parse_attributes_fragment(declaration);
46
47 Some(XmlDeclaration {
48 version: attributes
49 .iter()
50 .find(|attribute| attribute.name == "version")
51 .map(|attribute| attribute.value.clone()),
52 encoding: attributes
53 .iter()
54 .find(|attribute| attribute.name == "encoding")
55 .map(|attribute| attribute.value.clone()),
56 standalone: attributes
57 .iter()
58 .find(|attribute| attribute.name == "standalone")
59 .map(|attribute| attribute.value.clone()),
60 })
61}
62
63pub fn strip_xml_declaration(input: &str) -> &str {
65 let trimmed = input.trim_start();
66 if !trimmed.starts_with("<?xml") {
67 return input;
68 }
69
70 if let Some(end) = trimmed.find("?>") {
71 &trimmed[end + 2..]
72 } else {
73 input
74 }
75}
76
77pub fn extract_root_element(input: &str) -> Option<XmlElement> {
79 let candidate = strip_leading_xml_misc(strip_xml_declaration(input));
80 let start = candidate.find('<')?;
81 let tag_text = read_start_tag(&candidate[start + 1..])?;
82 let tag = tag_text.trim().trim_end_matches('/').trim();
83
84 if tag.is_empty() || tag.starts_with('/') {
85 return None;
86 }
87
88 let mut parts = tag.splitn(2, char::is_whitespace);
89 let name = parts.next()?.trim();
90 if name.is_empty() {
91 return None;
92 }
93
94 let attributes = parse_attributes_fragment(parts.next().unwrap_or_default());
95
96 Some(XmlElement {
97 name: name.to_string(),
98 attributes,
99 })
100}
101
102pub fn extract_attributes(element: &str) -> Vec<XmlAttribute> {
104 let mut tag = element.trim();
105 if let Some(stripped) = tag.strip_prefix('<') {
106 tag = stripped;
107 }
108 if let Some(stripped) = tag.strip_suffix('>') {
109 tag = stripped;
110 }
111 tag = tag.trim().trim_end_matches('/').trim();
112
113 let mut parts = tag.splitn(2, char::is_whitespace);
114 let Some(name) = parts.next() else {
115 return Vec::new();
116 };
117
118 if name.is_empty() || name.starts_with('!') || name.starts_with('?') || name.starts_with('/') {
119 return Vec::new();
120 }
121
122 parse_attributes_fragment(parts.next().unwrap_or_default())
123}
124
125pub fn get_attribute(element: &str, name: &str) -> Option<String> {
127 extract_attributes(element)
128 .into_iter()
129 .find(|attribute| attribute.name == name)
130 .map(|attribute| attribute.value)
131}
132
133pub fn has_attribute(element: &str, name: &str) -> bool {
135 get_attribute(element, name).is_some()
136}
137
138pub fn escape_xml(input: &str) -> String {
140 let mut escaped = String::with_capacity(input.len());
141
142 for ch in input.chars() {
143 match ch {
144 '&' => escaped.push_str("&"),
145 '<' => escaped.push_str("<"),
146 '>' => escaped.push_str(">"),
147 '"' => escaped.push_str("""),
148 '\'' => escaped.push_str("'"),
149 _ => escaped.push(ch),
150 }
151 }
152
153 escaped
154}
155
156pub fn unescape_xml(input: &str) -> String {
158 input
159 .replace("<", "<")
160 .replace(">", ">")
161 .replace(""", "\"")
162 .replace("'", "'")
163 .replace("&", "&")
164}
165
166pub fn strip_xml_comments(input: &str) -> String {
168 let mut output = String::new();
169 let mut remaining = input;
170
171 while let Some(start) = remaining.find("<!--") {
172 output.push_str(&remaining[..start]);
173 let comment_body = &remaining[start + 4..];
174
175 if let Some(end) = comment_body.find("-->") {
176 remaining = &comment_body[end + 3..];
177 } else {
178 remaining = "";
179 break;
180 }
181 }
182
183 output.push_str(remaining);
184 output
185}
186
187fn strip_leading_xml_misc(mut input: &str) -> &str {
188 loop {
189 let trimmed = input.trim_start();
190
191 if let Some(rest) = trimmed.strip_prefix("<!--") {
192 if let Some(end) = rest.find("-->") {
193 input = &rest[end + 3..];
194 continue;
195 }
196 return "";
197 }
198
199 if trimmed.starts_with("<?") {
200 if let Some(end) = trimmed.find("?>") {
201 input = &trimmed[end + 2..];
202 continue;
203 }
204 return "";
205 }
206
207 if trimmed.starts_with("<!") {
208 if let Some(end) = trimmed.find('>') {
209 input = &trimmed[end + 1..];
210 continue;
211 }
212 return "";
213 }
214
215 return trimmed;
216 }
217}
218
219fn read_start_tag(input: &str) -> Option<&str> {
220 let mut in_quote = None;
221
222 for (index, ch) in input.char_indices() {
223 if let Some(quote) = in_quote {
224 if ch == quote {
225 in_quote = None;
226 }
227 continue;
228 }
229
230 if ch == '"' || ch == '\'' {
231 in_quote = Some(ch);
232 continue;
233 }
234
235 if ch == '>' {
236 return Some(&input[..index]);
237 }
238 }
239
240 None
241}
242
243fn parse_attributes_fragment(fragment: &str) -> Vec<XmlAttribute> {
244 let mut attributes = Vec::new();
245 let bytes = fragment.as_bytes();
246 let mut index = 0;
247
248 while index < bytes.len() {
249 while index < bytes.len() && bytes[index].is_ascii_whitespace() {
250 index += 1;
251 }
252
253 if index >= bytes.len() || bytes[index] == b'/' {
254 break;
255 }
256
257 let name_start = index;
258 while index < bytes.len()
259 && !bytes[index].is_ascii_whitespace()
260 && bytes[index] != b'='
261 && bytes[index] != b'/'
262 {
263 index += 1;
264 }
265
266 if name_start == index {
267 break;
268 }
269
270 let name = &fragment[name_start..index];
271
272 while index < bytes.len() && bytes[index].is_ascii_whitespace() {
273 index += 1;
274 }
275
276 if index >= bytes.len() || bytes[index] != b'=' {
277 break;
278 }
279 index += 1;
280
281 while index < bytes.len() && bytes[index].is_ascii_whitespace() {
282 index += 1;
283 }
284
285 if index >= bytes.len() {
286 break;
287 }
288
289 let quote = bytes[index];
290 if quote != b'\'' && quote != b'"' {
291 break;
292 }
293 index += 1;
294
295 let value_start = index;
296 while index < bytes.len() && bytes[index] != quote {
297 index += 1;
298 }
299
300 if index >= bytes.len() {
301 break;
302 }
303
304 attributes.push(XmlAttribute {
305 name: name.to_string(),
306 value: fragment[value_start..index].to_string(),
307 });
308
309 index += 1;
310 }
311
312 attributes
313}