1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
use memchr::memchr; use std::borrow::Cow; use std::char; use crate::{XmlError, XmlResult}; pub fn xml_unescape<'a>(raw: &'a str) -> XmlResult<Cow<'a, str>> { let bytes = raw.as_bytes(); if let Some(i) = memchr(b'&', bytes) { let mut result = String::with_capacity(raw.len()); result.push_str(&raw[0..i]); let mut pos = i + 1; if let Some(i) = memchr(b';', &bytes[pos..]) { recognize(&raw[pos..pos + i], &mut result)?; pos += i + 1; } else { return Err(XmlError::UnterminatedEntity { entity: String::from(&raw[pos - 1..]), }); } while let Some(i) = memchr(b'&', &bytes[pos..]) { result.push_str(&raw[pos..pos + i]); pos += i + 1; if let Some(i) = memchr(b';', &bytes[pos..]) { recognize(&raw[pos..pos + i], &mut result)?; pos += i + 1; } else { return Err(XmlError::UnterminatedEntity { entity: String::from(&raw[pos - 1..]), }); } } result.push_str(&raw[pos..]); Ok(Cow::Owned(result)) } else { Ok(Cow::Borrowed(raw)) } } fn recognize(entity: &str, result: &mut String) -> XmlResult<()> { match entity { "quot" => result.push('"'), "apos" => result.push('\''), "gt" => result.push('>'), "lt" => result.push('<'), "amp" => result.push('&'), _ => { let val = if entity.starts_with("#x") { u32::from_str_radix(&entity[2..], 16).ok() } else if entity.starts_with("#") { u32::from_str_radix(&entity[1..], 10).ok() } else { None }; match val.and_then(char::from_u32) { Some(c) => result.push(c), None => { return Err(XmlError::UnrecognizedSymbol { symbol: String::from(entity), }) } } } } Ok(()) } #[test] fn test_unescape() { assert_eq!(xml_unescape("test").unwrap(), "test"); assert_eq!(xml_unescape("<test>").unwrap(), "<test>"); assert_eq!(xml_unescape("0").unwrap(), "0"); assert_eq!(xml_unescape("0").unwrap(), "0"); }