edit_xml/utils/
encoding.rs1use std::borrow::Cow;
2
3use quick_xml::escape::{EscapeError, ParseCharRefError};
4
5#[inline]
6fn from_str_radix(src: &str, radix: u32) -> Result<u32, ParseCharRefError> {
7 match src.as_bytes().first().copied() {
8 Some(b'+') | Some(b'-') => Err(ParseCharRefError::UnexpectedSign),
11 _ => u32::from_str_radix(src, radix).map_err(ParseCharRefError::InvalidNumber),
12 }
13}
14fn parse_number(num: &str) -> Result<char, ParseCharRefError> {
15 let code = if let Some(hex) = num.strip_prefix('x') {
16 from_str_radix(hex, 16)?
17 } else {
18 from_str_radix(num, 10)?
19 };
20 if code == 0 {
21 return Err(ParseCharRefError::IllegalCharacter(code));
22 }
23 match std::char::from_u32(code) {
24 Some(c) => Ok(c),
25 None => Err(ParseCharRefError::InvalidCodepoint(code)),
26 }
27}
28pub fn unescape_with_and_ignore<'input, 'entity, F>(
30 raw: &'input str,
31 mut resolve_entity: F,
32) -> Result<Cow<'input, str>, EscapeError>
33where
34 F: FnMut(&str) -> Option<&'entity str>,
36{
37 let bytes = raw.as_bytes();
38 let mut unescaped = None;
39 let mut last_end = 0;
40 let mut iter = memchr::Memchr2::new(b'&', b';', bytes);
41 while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') {
42 match iter.next() {
43 Some(end) if bytes[end] == b';' => {
44 if unescaped.is_none() {
46 unescaped = Some(String::with_capacity(raw.len()));
47 }
48 let unescaped = unescaped.as_mut().expect("initialized");
49 unescaped.push_str(&raw[last_end..start]);
50
51 let pat = &raw[start + 1..end];
53 if let Some(entity) = pat.strip_prefix('#') {
54 let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?;
55 unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
56 } else if let Some(value) = resolve_entity(pat) {
57 unescaped.push_str(value);
58 } else {
59 tracing::warn!("Unknown entity: {:?}", pat);
60 unescaped.push_str(&raw[start..=end]);
61 }
62
63 last_end = end + 1;
64 }
65 _ => return Err(EscapeError::UnterminatedEntity(start..raw.len())),
66 }
67 }
68
69 if let Some(mut unescaped) = unescaped {
70 if let Some(raw) = raw.get(last_end..) {
71 unescaped.push_str(raw);
72 }
73 Ok(Cow::Owned(unescaped))
74 } else {
75 Ok(Cow::Borrowed(raw))
76 }
77}
78pub fn unescape_with<'input, 'entity, F>(
79 raw: &'input str,
80 resolve_entity: F,
81) -> Result<Cow<'input, str>, EscapeError>
82where
83 F: FnMut(&str) -> Option<&'entity str>,
85{
86 #[cfg(feature = "soft-fail-unescape")]
87 {
88 unescape_with_and_ignore(raw, resolve_entity)
89 }
90 #[cfg(not(feature = "soft-fail-unescape"))]
91 {
92 quick_xml::escape::unescape_with(raw, resolve_entity)
93 }
94}
95
96#[cfg(test)]
97mod tests {
98
99 #[cfg(any(feature = "soft-fail-unescape", feature = "escape-html"))]
100 #[test]
101 fn oslash() -> anyhow::Result<()> {
102 use anyhow::Context;
103
104 use crate::{utils::tests, Document, ReadOptions};
105 use std::fs::read_to_string;
106 tests::setup_logger();
107 let file_path = tests::test_dir()
108 .join("bugs")
109 .join("oslash")
110 .join("oslash.xml");
111 if !file_path.exists() {
112 anyhow::bail!("File not found: {:?}", file_path);
113 }
114 let file = read_to_string(file_path).context("Failed to read file")?;
115
116 let doc = Document::parse_str_with_opts(&file, ReadOptions::relaxed()).unwrap();
117 let root = doc.root_element().context("Root Element not found")?;
118 let developers = root
119 .find(&doc, "developers")
120 .context("Developers Element not found")?;
121
122 for children in developers.children(&doc) {
123 println!("{:#?}", children.debug(&doc));
124 }
125 println!("Parse Successful");
126 Ok(())
127 }
128}