Skip to main content

use_xml/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4/// A conservative view of an XML declaration.
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct XmlDeclaration {
7    pub version: Option<String>,
8    pub encoding: Option<String>,
9    pub standalone: Option<String>,
10}
11
12/// A simple XML attribute.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct XmlAttribute {
15    pub name: String,
16    pub value: String,
17}
18
19/// A simple XML start element.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct XmlElement {
22    pub name: String,
23    pub attributes: Vec<XmlAttribute>,
24}
25
26/// Returns `true` when the input looks like XML.
27pub fn looks_like_xml(input: &str) -> bool {
28    has_xml_declaration(input) || extract_root_element(input).is_some()
29}
30
31/// Returns `true` when the input starts with an XML declaration.
32pub fn has_xml_declaration(input: &str) -> bool {
33    input.trim_start().starts_with("<?xml")
34}
35
36/// Extracts the XML declaration when present.
37pub fn extract_xml_declaration(input: &str) -> Option<XmlDeclaration> {
38    let trimmed = input.trim_start();
39    if !trimmed.starts_with("<?xml") {
40        return None;
41    }
42
43    let end = trimmed.find("?>")?;
44    let declaration = &trimmed[5..end];
45    let attributes = parse_attributes_fragment(declaration);
46
47    Some(XmlDeclaration {
48        version: attributes
49            .iter()
50            .find(|attribute| attribute.name == "version")
51            .map(|attribute| attribute.value.clone()),
52        encoding: attributes
53            .iter()
54            .find(|attribute| attribute.name == "encoding")
55            .map(|attribute| attribute.value.clone()),
56        standalone: attributes
57            .iter()
58            .find(|attribute| attribute.name == "standalone")
59            .map(|attribute| attribute.value.clone()),
60    })
61}
62
63/// Strips a leading XML declaration when present.
64pub fn strip_xml_declaration(input: &str) -> &str {
65    let trimmed = input.trim_start();
66    if !trimmed.starts_with("<?xml") {
67        return input;
68    }
69
70    if let Some(end) = trimmed.find("?>") {
71        &trimmed[end + 2..]
72    } else {
73        input
74    }
75}
76
77/// Extracts the root element start tag when present.
78pub fn extract_root_element(input: &str) -> Option<XmlElement> {
79    let candidate = strip_leading_xml_misc(strip_xml_declaration(input));
80    let start = candidate.find('<')?;
81    let tag_text = read_start_tag(&candidate[start + 1..])?;
82    let tag = tag_text.trim().trim_end_matches('/').trim();
83
84    if tag.is_empty() || tag.starts_with('/') {
85        return None;
86    }
87
88    let mut parts = tag.splitn(2, char::is_whitespace);
89    let name = parts.next()?.trim();
90    if name.is_empty() {
91        return None;
92    }
93
94    let attributes = parse_attributes_fragment(parts.next().unwrap_or_default());
95
96    Some(XmlElement {
97        name: name.to_string(),
98        attributes,
99    })
100}
101
102/// Extracts attributes from an element start tag.
103pub fn extract_attributes(element: &str) -> Vec<XmlAttribute> {
104    let mut tag = element.trim();
105    if let Some(stripped) = tag.strip_prefix('<') {
106        tag = stripped;
107    }
108    if let Some(stripped) = tag.strip_suffix('>') {
109        tag = stripped;
110    }
111    tag = tag.trim().trim_end_matches('/').trim();
112
113    let mut parts = tag.splitn(2, char::is_whitespace);
114    let Some(name) = parts.next() else {
115        return Vec::new();
116    };
117
118    if name.is_empty() || name.starts_with('!') || name.starts_with('?') || name.starts_with('/') {
119        return Vec::new();
120    }
121
122    parse_attributes_fragment(parts.next().unwrap_or_default())
123}
124
125/// Returns the named attribute from an element when present.
126pub fn get_attribute(element: &str, name: &str) -> Option<String> {
127    extract_attributes(element)
128        .into_iter()
129        .find(|attribute| attribute.name == name)
130        .map(|attribute| attribute.value)
131}
132
133/// Returns `true` when an element has the named attribute.
134pub fn has_attribute(element: &str, name: &str) -> bool {
135    get_attribute(element, name).is_some()
136}
137
138/// Escapes XML text content.
139pub fn escape_xml(input: &str) -> String {
140    let mut escaped = String::with_capacity(input.len());
141
142    for ch in input.chars() {
143        match ch {
144            '&' => escaped.push_str("&amp;"),
145            '<' => escaped.push_str("&lt;"),
146            '>' => escaped.push_str("&gt;"),
147            '"' => escaped.push_str("&quot;"),
148            '\'' => escaped.push_str("&apos;"),
149            _ => escaped.push(ch),
150        }
151    }
152
153    escaped
154}
155
156/// Unescapes the most common XML entities.
157pub fn unescape_xml(input: &str) -> String {
158    input
159        .replace("&lt;", "<")
160        .replace("&gt;", ">")
161        .replace("&quot;", "\"")
162        .replace("&apos;", "'")
163        .replace("&amp;", "&")
164}
165
166/// Strips XML comments from the input.
167pub fn strip_xml_comments(input: &str) -> String {
168    let mut output = String::new();
169    let mut remaining = input;
170
171    while let Some(start) = remaining.find("<!--") {
172        output.push_str(&remaining[..start]);
173        let comment_body = &remaining[start + 4..];
174
175        if let Some(end) = comment_body.find("-->") {
176            remaining = &comment_body[end + 3..];
177        } else {
178            remaining = "";
179            break;
180        }
181    }
182
183    output.push_str(remaining);
184    output
185}
186
187fn strip_leading_xml_misc(mut input: &str) -> &str {
188    loop {
189        let trimmed = input.trim_start();
190
191        if let Some(rest) = trimmed.strip_prefix("<!--") {
192            if let Some(end) = rest.find("-->") {
193                input = &rest[end + 3..];
194                continue;
195            }
196            return "";
197        }
198
199        if trimmed.starts_with("<?") {
200            if let Some(end) = trimmed.find("?>") {
201                input = &trimmed[end + 2..];
202                continue;
203            }
204            return "";
205        }
206
207        if trimmed.starts_with("<!") {
208            if let Some(end) = trimmed.find('>') {
209                input = &trimmed[end + 1..];
210                continue;
211            }
212            return "";
213        }
214
215        return trimmed;
216    }
217}
218
219fn read_start_tag(input: &str) -> Option<&str> {
220    let mut in_quote = None;
221
222    for (index, ch) in input.char_indices() {
223        if let Some(quote) = in_quote {
224            if ch == quote {
225                in_quote = None;
226            }
227            continue;
228        }
229
230        if ch == '"' || ch == '\'' {
231            in_quote = Some(ch);
232            continue;
233        }
234
235        if ch == '>' {
236            return Some(&input[..index]);
237        }
238    }
239
240    None
241}
242
243fn parse_attributes_fragment(fragment: &str) -> Vec<XmlAttribute> {
244    let mut attributes = Vec::new();
245    let bytes = fragment.as_bytes();
246    let mut index = 0;
247
248    while index < bytes.len() {
249        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
250            index += 1;
251        }
252
253        if index >= bytes.len() || bytes[index] == b'/' {
254            break;
255        }
256
257        let name_start = index;
258        while index < bytes.len()
259            && !bytes[index].is_ascii_whitespace()
260            && bytes[index] != b'='
261            && bytes[index] != b'/'
262        {
263            index += 1;
264        }
265
266        if name_start == index {
267            break;
268        }
269
270        let name = &fragment[name_start..index];
271
272        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
273            index += 1;
274        }
275
276        if index >= bytes.len() || bytes[index] != b'=' {
277            break;
278        }
279        index += 1;
280
281        while index < bytes.len() && bytes[index].is_ascii_whitespace() {
282            index += 1;
283        }
284
285        if index >= bytes.len() {
286            break;
287        }
288
289        let quote = bytes[index];
290        if quote != b'\'' && quote != b'"' {
291            break;
292        }
293        index += 1;
294
295        let value_start = index;
296        while index < bytes.len() && bytes[index] != quote {
297            index += 1;
298        }
299
300        if index >= bytes.len() {
301            break;
302        }
303
304        attributes.push(XmlAttribute {
305            name: name.to_string(),
306            value: fragment[value_start..index].to_string(),
307        });
308
309        index += 1;
310    }
311
312    attributes
313}