1use alloc::{format, string::String, vec, vec::Vec};
2use regex::Regex;
3
4pub static NO_INDEX: usize = usize::MAX;
6
7#[derive(Debug, Default, Clone)]
9pub struct XMLOptions {
10 pub start_index: Option<usize>,
14 pub nested: Option<bool>,
16 pub return_on_first: Option<bool>,
18}
19
20#[derive(Debug, Default, Clone, PartialEq)]
22pub struct XMLTag {
23 pub inner: Option<String>,
25 pub outer: String,
27 pub start: usize,
29 pub end: usize,
31}
32#[derive(Debug, Clone, PartialEq)]
34pub enum XMLTagItem {
35 String(String),
37 XMLTag(XMLTag),
39}
40#[derive(Debug, Default, Clone, PartialEq)]
42pub struct XMLStep {
43 pub name: String,
45 pub index: Option<usize>,
47}
48#[derive(Debug, Clone, PartialEq)]
50pub enum XMLPathItem {
51 String(String),
53 XMLStep(XMLStep),
55}
56pub type XMLPath = Vec<XMLPathItem>;
58
59pub fn xml_count_substring(string: &str, substring: &str) -> usize {
61 let re = Regex::new(substring).unwrap();
62 re.find_iter(string).count()
63}
64
65pub fn xml_find_tag_by_name(
67 xml: &str,
68 tag_name: &str,
69 options: Option<XMLOptions>,
70) -> Option<XMLTag> {
71 let options = options.unwrap_or_default();
72 let nested = options.nested == Some(true);
73
74 let start_index = options.start_index.unwrap_or(0);
75
76 let start = xml_index_of_match(xml, &format!("<{tag_name}[ \n>/]"), start_index);
78 if start == NO_INDEX {
79 return None;
80 }
81
82 let after_start = &xml[start + tag_name.len()..]; let mut relative_end = xml_index_of_match_end(after_start, "^[^<]*[ /]>", 0);
85 let rel_end_char = after_start.chars().nth(relative_end - 1).unwrap_or('\0');
86 let self_closing = relative_end != NO_INDEX && rel_end_char == '/';
87
88 if !self_closing {
89 if nested {
91 let mut start_index = 0;
92 let mut openings = 1;
93 let mut closings = 0;
94 while {
95 relative_end =
96 xml_index_of_match_end(after_start, &format!("[ /]{tag_name}>"), start_index);
97 relative_end != NO_INDEX
98 } {
99 let clip = &after_start[start_index..relative_end + 1];
100 openings += xml_count_substring(clip, &format!("<{tag_name}[ \n\t>]"));
101 closings += xml_count_substring(clip, &format!("</{tag_name}>"));
102 if closings >= openings {
104 break;
105 }
106 start_index = relative_end;
107 }
108 } else {
109 relative_end = xml_index_of_match_end(after_start, &format!("[ /]{tag_name}>"), 0);
110 }
111 }
112
113 let end = start + tag_name.len() + relative_end + 1;
114 if end == NO_INDEX {
115 return None;
116 }
117
118 let outer = &xml[start..end]; let inner: Option<String> = if self_closing {
122 None
123 } else {
124 let start_pos = outer.find('>').unwrap_or(0);
125 let end_pos = outer.rfind('<').unwrap_or(outer.len());
126 Some(outer[start_pos + 1..end_pos].into())
127 };
128
129 Some(XMLTag { inner, outer: outer.into(), start, end })
130}
131
132pub fn xml_find_tag_by_path(
134 xml: &str,
135 path: &XMLPath,
136 options: Option<XMLOptions>,
137) -> Option<XMLTag> {
138 let found = xml_find_tags_by_path(
139 xml,
140 path,
141 Some(XMLOptions { return_on_first: Some(true), ..options.unwrap_or_default() }),
142 );
143
144 found.into_iter().next()
145}
146
147pub fn xml_find_tags_by_name(
157 xml: &str,
158 tag_name: &str,
159 options: Option<XMLOptions>,
160) -> Vec<XMLTag> {
161 let options = options.unwrap_or_default();
162 let nested = options.nested.unwrap_or(true);
163 let mut start_index = options.start_index.unwrap_or(0);
164 let mut tags = vec![];
165 loop {
166 let tag = xml_find_tag_by_name(
167 xml,
168 tag_name,
169 Some(XMLOptions { start_index: Some(start_index), ..options }),
170 );
171 if let Some(tag) = tag {
172 if nested {
173 start_index = tag.start + 1 + tag_name.len();
174 } else {
175 start_index = tag.end;
176 }
177 tags.push(tag);
178 } else {
179 break;
180 }
181 }
182
183 tags
184}
185
186pub fn xml_find_tags_by_path(
188 xml: &str,
189 path: &XMLPath,
190 options: Option<XMLOptions>,
191) -> Vec<XMLTag> {
192 let options = options.unwrap_or_default();
193 let return_on_first = options.return_on_first.unwrap_or(false);
194
195 if path.is_empty() {
196 return vec![];
197 }
198
199 let path0 = match &path[0] {
201 XMLPathItem::String(name) => XMLStep { name: name.clone(), index: None },
202 XMLPathItem::XMLStep(step) => step.clone(),
203 };
204
205 let mut tags = xml_find_tags_by_name(
207 xml,
208 &path0.name,
209 Some(XMLOptions { nested: Some(false), ..options }),
210 );
211
212 if let Some(index) = path0.index {
214 tags = tags.get(index).cloned().into_iter().collect();
215 }
216
217 let path = &path[1..];
218
219 for (path_index, part) in path.iter().enumerate() {
220 let part = match part {
221 XMLPathItem::String(name) => XMLStep { name: name.clone(), index: None },
222 XMLPathItem::XMLStep(step) => step.clone(),
223 };
224
225 let mut all_sub_tags = Vec::new();
226
227 for tag in &tags {
228 let mut sub_tags = xml_find_tags_by_name(
229 &tag.outer,
230 &part.name,
231 Some(XMLOptions { start_index: Some(1), ..options }),
232 );
233
234 for sub_tag in &mut sub_tags {
236 sub_tag.start += tag.start;
237 sub_tag.end += tag.start;
238 }
239
240 if return_on_first && path_index == path.len() - 1 && !sub_tags.is_empty() {
242 return vec![sub_tags.remove(0)];
243 }
244
245 all_sub_tags.extend(sub_tags);
246 }
247
248 tags = all_sub_tags;
249
250 if let Some(index) = part.index {
252 tags = tags.get(index).cloned().into_iter().collect();
253 }
254 }
255
256 tags
257}
258
259pub fn xml_get_attribute(tag: &XMLTagItem, attribute_name: &str) -> Option<String> {
261 let xml = match tag {
262 XMLTagItem::String(s) => s,
263 XMLTagItem::XMLTag(t) => &t.outer,
264 };
265
266 if let Some(end) = xml.find('>') {
268 let opening = &xml[..=end];
269
270 let quote_chars = ['"', '\''];
271 for "e in "e_chars {
272 let pattern = format!(r#"{attribute_name}={quote}([^{quote}]*){quote}"#);
273 let re = Regex::new(&pattern).ok()?;
274 if let Some(captures) = re.captures(opening) {
275 return captures.get(1).map(|m| m.as_str().into());
276 }
277 }
278 }
279 None
280}
281
282pub fn xml_index_of_match_end(xml: &str, pattern: &str, start_index: usize) -> usize {
284 let re = Regex::new(pattern).unwrap();
286 let mtch: Vec<(usize, &str)> = re
287 .captures_iter(&xml[start_index..])
288 .map(|cap| (cap.get(0).unwrap().start(), cap.get(0).unwrap().as_str()))
289 .collect();
290 if !mtch.is_empty() { start_index + mtch[0].0 + mtch[0].1.len() - 1 } else { NO_INDEX }
291}
292
293pub fn xml_index_of_match(xml: &str, pattern: &str, start_index: usize) -> usize {
303 let re = Regex::new(pattern).unwrap();
305 let mtch: Vec<(usize, &str)> = re
306 .captures_iter(&xml[start_index..])
307 .map(|cap| (cap.get(0).unwrap().start(), cap.get(0).unwrap().as_str()))
308 .collect();
309 if !mtch.is_empty() { start_index + mtch[0].0 } else { NO_INDEX }
310}
311
312pub fn xml_remove_comments(xml: &str) -> String {
314 let mut result = String::with_capacity(xml.len());
316 let mut inside_comment = false;
317 let mut chars = xml.chars().peekable();
318
319 while let Some(c) = chars.next() {
320 if inside_comment {
321 if c == '-' && chars.peek() == Some(&'-') {
322 chars.next(); if chars.peek() == Some(&'>') {
324 chars.next(); inside_comment = false;
326 }
327 }
328 } else if c == '<' && chars.peek() == Some(&'!') {
329 let mut temp_iter = chars.clone();
330 temp_iter.next(); if temp_iter.next() == Some('-') && temp_iter.next() == Some('-') {
332 inside_comment = true;
333 chars.next(); chars.next(); } else {
336 result.push('<');
337 }
338 } else {
339 result.push(c);
340 }
341 }
342
343 result
344}
345
346pub fn xml_remove_tags_by_name(xml: &str, tag_name: &str, options: Option<XMLOptions>) -> String {
348 let mut res: String = xml.into();
349 loop {
350 let tag = xml_find_tag_by_name(&res, tag_name, options.as_ref().cloned());
351 if let Some(tag) = tag {
352 res = format!("{}{}", &res[0..tag.start], &res[tag.end..]);
353 } else {
354 break;
355 }
356 }
357 res
358}