hard_xml/
xml_unescape.rs

1use memchr::memchr;
2use std::borrow::Cow;
3use std::char;
4
5use crate::{XmlError, XmlResult};
6
7pub fn xml_unescape<'a>(raw: &'a str) -> XmlResult<Cow<'a, str>> {
8    let bytes = raw.as_bytes();
9
10    if let Some(i) = memchr(b'&', bytes) {
11        let mut result = String::with_capacity(raw.len());
12
13        result.push_str(&raw[0..i]);
14
15        let mut pos = i + 1;
16
17        if let Some(i) = memchr(b';', &bytes[pos..]) {
18            recognize(&raw[pos..pos + i], &mut result)?;
19
20            pos += i + 1;
21        } else {
22            return Err(XmlError::UnterminatedEntity {
23                entity: String::from(&raw[pos - 1..]),
24            });
25        }
26
27        while let Some(i) = memchr(b'&', &bytes[pos..]) {
28            result.push_str(&raw[pos..pos + i]);
29
30            pos += i + 1;
31
32            if let Some(i) = memchr(b';', &bytes[pos..]) {
33                recognize(&raw[pos..pos + i], &mut result)?;
34
35                pos += i + 1;
36            } else {
37                return Err(XmlError::UnterminatedEntity {
38                    entity: String::from(&raw[pos - 1..]),
39                });
40            }
41        }
42
43        result.push_str(&raw[pos..]);
44
45        Ok(Cow::Owned(result))
46    } else {
47        Ok(Cow::Borrowed(raw))
48    }
49}
50
51fn recognize(entity: &str, result: &mut String) -> XmlResult<()> {
52    match entity {
53        "quot" => result.push('"'),
54        "apos" => result.push('\''),
55        "gt" => result.push('>'),
56        "lt" => result.push('<'),
57        "amp" => result.push('&'),
58        _ => {
59            let val = if entity.starts_with("#x") {
60                u32::from_str_radix(&entity[2..], 16).ok()
61            } else if entity.starts_with('#') {
62                u32::from_str_radix(&entity[1..], 10).ok()
63            } else {
64                None
65            };
66            match val.and_then(char::from_u32) {
67                Some(c) => result.push(c),
68                None => {
69                    return Err(XmlError::UnrecognizedSymbol {
70                        symbol: String::from(entity),
71                    })
72                }
73            }
74        }
75    }
76    Ok(())
77}
78
79#[test]
80fn test_unescape() {
81    assert_eq!(xml_unescape("test").unwrap(), "test");
82    assert_eq!(xml_unescape("&lt;test&gt;").unwrap(), "<test>");
83    assert_eq!(xml_unescape("&#x30;").unwrap(), "0");
84    assert_eq!(xml_unescape("&#48;").unwrap(), "0");
85}