1use std::collections::HashMap;
4
5use quick_xml::{
6 events::{BytesStart, Event},
7 Reader,
8};
9
10use rquickjs::{
11 class::{Trace, Tracer},
12 function::Opt,
13 module::{Declarations, Exports, ModuleDef},
14 object::Property,
15 prelude::This,
16 Array, Class, Ctx, Error, Function, IntoJs, Object, Result, Value,
17};
18
19const AMP: &str = "&";
20const LT: &str = "<";
21const GT: &str = ">";
22const QUOT: &str = """;
23const APOS: &str = "'";
24const CR: &str = "
";
25const LF: &str = "
";
26const NEL: &str = "…";
27const LS: &str = "
";
28
29use crate::{
30 modules::module::export_default,
32 utils::{
33 object::{get_bytes, ObjectExt},
34 result::ResultExt,
35 string::JoinToString,
36 },
37};
38
39#[rquickjs::class]
40struct XMLParser<'js> {
41 tag_value_processor: Option<Function<'js>>,
42 attribute_value_processor: Option<Function<'js>>,
43 attribute_name_prefix: String,
44 ignore_attributes: bool,
45 text_node_name: String,
46 entities: HashMap<String, String>,
47}
48
49impl<'js> Trace<'js> for XMLParser<'js> {
50 fn trace<'a>(&self, tracer: Tracer<'a, 'js>) {
51 if let Some(tag_value_processor) = &self.tag_value_processor {
52 tracer.mark(tag_value_processor)
53 }
54 if let Some(attribute_value_processor) = &self.attribute_value_processor {
55 tracer.mark(attribute_value_processor)
56 }
57 }
58}
59
60struct StackObject<'js> {
61 obj: Object<'js>,
62 has_value: bool,
63}
64impl<'js> StackObject<'js> {
65 fn new(ctx: Ctx<'js>) -> Result<Self> {
66 Ok(Self {
67 obj: Object::new(ctx)?,
68 has_value: false,
69 })
70 }
71
72 fn into_value(self, ctx: &Ctx<'js>) -> Result<Value<'js>> {
73 if self.has_value {
74 return Ok(self.obj.into_value());
75 }
76 "".into_js(ctx)
77 }
78}
79
80#[rquickjs::methods(rename_all = "camelCase")]
81impl<'js> XMLParser<'js> {
82 #[qjs(constructor)]
83 pub fn new(_ctx: Ctx<'js>, options: Opt<Object<'js>>) -> Result<Self> {
84 let mut tag_value_processor = None;
85 let mut attribute_value_processor = None;
86 let mut attribute_name_prefix = String::from("@_");
87 let mut ignore_attributes = true;
88 let mut text_node_name = String::from("#text");
89 if let Some(options) = options.0 {
90 tag_value_processor = options.get_optional("tagValueProcessor")?;
91 attribute_value_processor = options.get_optional("attributeValueProcessor")?;
92 if let Some(prefix) = options.get_optional("attributeNamePrefix")? {
93 attribute_name_prefix = prefix;
94 }
95 if let Some(attributes_ignored) = options.get_optional("ignoreAttributes")? {
96 ignore_attributes = attributes_ignored
97 }
98 if let Some(name) = options.get_optional("textNodeName")? {
99 text_node_name = name
100 }
101 }
102
103 Ok(XMLParser {
104 tag_value_processor,
105 attribute_value_processor,
106 entities: HashMap::new(),
107 attribute_name_prefix,
108 ignore_attributes,
109 text_node_name,
110 })
111 }
112
113 pub fn add_entity(&mut self, key: String, value: String) {
114 self.entities.insert(key, value);
115 }
116
117 pub fn parse(&self, ctx: Ctx<'js>, xml: Value<'js>) -> Result<Object<'js>> {
118 let bytes = get_bytes(&ctx, xml)?;
119 let mut reader = Reader::from_reader(bytes.as_ref());
120 reader.config_mut().trim_text(true);
121
122 let mut current_obj = StackObject::new(ctx.clone())?;
123 current_obj.has_value = true;
124 let mut buf = Vec::new();
125 let mut current_key = String::new();
126 let mut current_value: Option<String> = None;
127 let mut path: Vec<(String, StackObject<'js>)> = vec![];
128 let mut has_attributes = false;
129
130 loop {
131 buf.clear();
132
133 match reader.read_event_into(&mut buf) {
134 Ok(Event::Empty(ref tag)) => {
135 current_key = Self::get_tag_name(&ctx, &reader, tag)?;
136
137 let mut obj = StackObject::new(ctx.clone())?;
138 self.process_attributes(&ctx, &reader, &path, tag, &mut obj, &mut false)?;
139 current_obj.has_value = true;
140
141 Self::process_end(&ctx, ¤t_obj, obj.into_value(&ctx)?, ¤t_key)?;
142 }
143 Ok(Event::Start(ref tag)) => {
144 has_attributes = false;
145 current_key = Self::get_tag_name(&ctx, &reader, tag)?;
146 path.push((current_key.clone(), current_obj));
147
148 let obj = StackObject::new(ctx.clone())?;
149 current_obj = obj;
150
151 self.process_attributes(
152 &ctx,
153 &reader,
154 &path,
155 tag,
156 &mut current_obj,
157 &mut has_attributes,
158 )?;
159 }
160 Ok(Event::End(_)) => {
161 let (parent_tag, mut parent_obj) = path.pop().unwrap();
162 parent_obj.has_value = true;
163 let value = if let Some(value) = current_value.take() {
164 value.into_js(&ctx)?
165 } else {
166 current_obj.into_value(&ctx)?
167 };
168
169 current_obj = parent_obj;
170
171 Self::process_end(&ctx, ¤t_obj, value, &parent_tag)?;
172 }
173 Ok(Event::CData(text)) => {
174 let text = text.escape().or_throw(&ctx)?;
175 let tag_value = String::from_utf8_lossy(text.as_ref()).to_string();
176 let tag_value =
177 self.process_tag_value(&path, ¤t_key, tag_value, has_attributes)?;
178 if has_attributes {
179 current_obj.has_value = true;
180 current_obj.obj.set(&self.text_node_name, tag_value)?;
181 } else {
182 current_value = Some(tag_value)
183 }
184 }
185 Ok(Event::Text(ref text)) => {
186 let tag_value = text
187 .unescape_with(|v| self.entities.get(v).map(|x| x.as_str()))
188 .or_throw(&ctx)?
189 .to_string();
190 let tag_value =
191 self.process_tag_value(&path, ¤t_key, tag_value, has_attributes)?;
192
193 if has_attributes {
194 current_obj.has_value = true;
195 current_obj.obj.set(&self.text_node_name, tag_value)?;
196 } else {
197 current_value = Some(tag_value)
198 }
199 }
200 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
201 Ok(Event::Eof) => break,
202 _ => {}
203 }
204 }
205 Ok(current_obj.obj)
206 }
207}
208
209impl<'js> XMLParser<'js> {
210 fn get_tag_name(
211 ctx: &Ctx<'js>,
212 reader: &Reader<&[u8]>,
213 tag: &BytesStart<'_>,
214 ) -> Result<String> {
215 let tag = tag.name();
216 let tag_name = reader.decoder().decode(tag.as_ref()).or_throw(ctx)?;
217
218 Ok(tag_name.to_string())
219 }
220
221 fn process_end(
222 ctx: &Ctx<'js>,
223 current_obj: &StackObject<'js>,
224 value: Value<'js>,
225 tag: &str,
226 ) -> Result<()> {
227 if current_obj.obj.contains_key(tag)? {
228 let parent_value: Value = current_obj.obj.get(tag)?;
229 if !parent_value.is_array() {
230 let array = Array::new(ctx.clone())?;
231 array.set(0, parent_value)?;
232 array.set(1, value)?;
233 current_obj.obj.set(tag, array.as_value())?;
234 } else {
235 let array = parent_value.as_array().or_throw(ctx)?;
236 array.set(array.len(), value)?;
237 current_obj.obj.set(tag, array.as_value())?;
238 }
239 } else {
240 current_obj.obj.prop(
241 tag,
242 Property::from(value).configurable().enumerable().writable(),
243 )?;
244 }
245 Ok(())
246 }
247
248 fn process_attributes(
249 &self,
250 ctx: &Ctx<'js>,
251 reader: &Reader<&[u8]>,
252 path: &[(String, StackObject<'js>)],
253 tag: &BytesStart<'_>,
254 stack_object: &mut StackObject<'js>,
255 has_attributes: &mut bool,
256 ) -> Result<()> {
257 if !self.ignore_attributes {
258 for attribute in tag.attributes() {
259 stack_object.has_value = true;
260 *has_attributes = true;
261 let attr = attribute.or_throw(ctx)?;
262
263 let key_slice = attr.key.as_ref();
264 let key = if !self.attribute_name_prefix.is_empty() {
265 let prefix_bytes = self.attribute_name_prefix.as_bytes();
266 let mut key_bytes = Vec::with_capacity(prefix_bytes.len() + key_slice.len());
267 key_bytes.extend_from_slice(prefix_bytes);
268 key_bytes.extend_from_slice(key_slice);
269
270 reader
271 .decoder()
272 .decode(&key_bytes)
273 .or_throw(ctx)?
274 .to_string()
275 } else {
276 reader
277 .decoder()
278 .decode(key_slice)
279 .or_throw(ctx)?
280 .to_string()
281 };
282
283 let mut value = reader
284 .decoder()
285 .decode(attr.value.as_ref())
286 .or_throw(ctx)?
287 .to_string();
288
289 if let Some(attribute_value_processor) = &self.attribute_value_processor {
290 let jpath: String = path.iter().join_to_string(".", |(k, _)| k);
291 if let Some(new_value) =
292 attribute_value_processor.call((key.clone(), value.clone(), jpath))?
293 {
294 value = new_value
295 }
296 }
297 stack_object.obj.set(key, value)?;
298 }
299 }
300 Ok(())
301 }
302
303 fn process_tag_value(
304 &self,
305 path: &[(String, StackObject<'js>)],
306 key: &String,
307 value: String,
308 has_attributes: bool,
309 ) -> Result<String> {
310 if value.is_empty() {
311 return Ok(value);
312 }
313
314 if let Some(tag_value_processor) = &self.tag_value_processor {
315 let jpath: String = path.iter().join_to_string(".", |(k, _)| k);
316 if let Some(new_value) =
317 tag_value_processor.call((key, value.clone(), jpath, has_attributes))?
318 {
319 return Ok(new_value);
320 }
321 }
322 Ok::<_, Error>(value)
323 }
324}
325
326#[derive(Debug, Clone)]
327#[rquickjs::class]
328struct XmlText {
329 value: String,
330}
331
332impl<'js> Trace<'js> for XmlText {
333 fn trace<'a>(&self, _tracer: Tracer<'a, 'js>) {}
334}
335
336#[rquickjs::methods(rename_all = "camelCase")]
337impl XmlText {
338 #[qjs(constructor)]
339 fn new(value: String) -> Self {
340 let mut escaped = String::with_capacity(value.len());
341 escape_element(&mut escaped, &value);
342 XmlText { value: escaped }
343 }
344
345 fn to_string(&self) -> String {
346 self.value.clone()
347 }
348}
349
350#[derive(Debug, Clone)]
351#[rquickjs::class]
352#[derive(rquickjs::class::Trace)]
353struct XmlNode<'js> {
354 #[qjs(skip_trace)]
355 name: String,
356 children: Vec<Value<'js>>,
358 #[qjs(skip_trace)]
359 attributes: Vec<(String, String)>,
361}
362
363enum NodeStackEntry<'js> {
364 Node(Class<'js, XmlNode<'js>>),
365 End(String),
366}
367
368#[rquickjs::methods(rename_all = "camelCase")]
369impl<'js> XmlNode<'js> {
370 #[qjs(constructor)]
371 fn new(name: String, children: Opt<Vec<Value<'js>>>) -> Result<Self> {
372 let node = XmlNode {
373 name,
374 attributes: Vec::new(),
375 children: children.0.unwrap_or_default(),
376 };
377
378 Ok(node)
379 }
380
381 #[qjs(static)]
382 fn of(
383 ctx: Ctx<'js>,
384 name: String,
385 child_text: Opt<String>,
386 with_name: Opt<String>,
387 ) -> Result<Value<'js>> {
388 let mut node = XmlNode {
389 name,
390 children: Vec::new(),
391 attributes: Vec::new(),
392 };
393
394 if let Some(text) = child_text.0 {
395 let xml_text = Class::instance(ctx.clone(), XmlText::new(text))?;
396 node.children.push(xml_text.into_value());
397 }
398
399 if let Some(new_name) = with_name.0 {
400 node.name = new_name;
401 }
402
403 node.into_js(&ctx)
404 }
405
406 fn with_name(this: This<Class<'js, Self>>, name: String) -> Class<'js, Self> {
407 this.borrow_mut().name = name;
408 this.0
409 }
410
411 fn add_attribute(
412 this: This<Class<'js, Self>>,
413 name: String,
414 value: String,
415 ) -> Class<'js, Self> {
416 let this2 = this.clone();
417 let mut borrow = this2.borrow_mut();
418 if let Some(pos) = borrow.attributes.iter().position(|(a, _)| a == &name) {
419 borrow.attributes[pos] = (name, value);
420 } else {
421 borrow.attributes.push((name, value));
422 }
423 this.0
424 }
425
426 fn add_child_node(this: This<Class<'js, Self>>, value: Value<'js>) -> Result<Class<'js, Self>> {
427 let this2 = this.clone();
428 this2.borrow_mut().children.push(value);
429 Ok(this.0)
430 }
431
432 fn remove_attribute(this: This<Class<'js, Self>>, name: String) -> Class<'js, Self> {
433 let this2 = this.clone();
434 let mut borrow = this2.borrow_mut();
435 if let Some(pos) = borrow.attributes.iter().position(|(a, _)| a == &name) {
436 borrow.attributes.remove(pos);
437 }
438 this.0
439 }
440
441 fn to_string(this: This<Class<'js, Self>>, ctx: Ctx<'js>) -> Result<String> {
442 let class = this.0;
443 let mut xml_text = String::with_capacity(8);
444
445 let mut stack = vec![NodeStackEntry::Node(class)];
446
447 while let Some(node) = stack.pop() {
448 match node {
449 NodeStackEntry::Node(node) => {
450 let borrow = node.borrow();
451 xml_text.push('<');
452 xml_text.push_str(&borrow.name);
453
454 for (attribute_name, attribute) in &borrow.attributes {
455 xml_text.push(' ');
456 xml_text.push_str(attribute_name);
457 xml_text.push_str("=\"");
458 escape_attribute(&mut xml_text, attribute);
459 xml_text.push('"');
460 }
461
462 let has_children = !borrow.children.is_empty();
463 if has_children {
464 stack.push(NodeStackEntry::End(borrow.name.clone()));
465 xml_text.push('>');
466
467 for child in borrow.children.iter().rev() {
469 if let Some(obj) = child.as_object() {
470 if let Some(node) = Class::<Self>::from_object(&obj.clone()) {
471 stack.push(NodeStackEntry::Node(node))
472 } else if let Some(text) =
473 Class::<XmlText>::from_object(&obj.clone())
474 {
475 xml_text.push_str(&text.borrow().value);
476 } else {
477 let to_string_fn = obj.get::<_, Function>("toString")?;
478 let string_value: String = to_string_fn.call(())?;
479 xml_text.push_str(&string_value);
480 }
481 } else {
482 let string_value: String = child
483 .clone()
484 .try_into_string()
485 .map_err(|err| format!("Unable to convert {:?} to string", err))
486 .or_throw(&ctx)?
487 .to_string()?;
488 xml_text.push_str(&string_value);
489 }
490 }
491 } else {
492 xml_text.push_str("/>");
493 }
494 drop(borrow);
495 }
496 NodeStackEntry::End(name) => {
497 xml_text.push_str("</");
498 xml_text.push_str(&name);
499 xml_text.push('>');
500 }
501 }
502 }
503
504 Ok(xml_text)
505 }
506}
507
508fn escape_attribute(text: &mut String, value: &str) {
509 for c in value.chars() {
510 match c {
511 '&' => text.push_str(AMP),
512 '<' => text.push_str(LT),
513 '>' => text.push_str(GT),
514 '"' => text.push_str(QUOT),
515 _ => text.push(c),
516 }
517 }
518}
519
520fn escape_element(text: &mut String, value: &str) {
521 for c in value.chars() {
522 match c {
523 '&' => text.push_str(AMP),
524 '<' => text.push_str(LT),
525 '>' => text.push_str(GT),
526 '\'' => text.push_str(APOS),
527 '"' => text.push_str(QUOT),
528 '\r' => text.push_str(CR),
529 '\n' => text.push_str(LF),
530 '\u{0085}' => text.push_str(NEL),
531 '\u{2028}' => text.push_str(LS),
532 _ => text.push(c),
533 }
534 }
535}
536
537pub struct XmlModule;
538
539impl ModuleDef for XmlModule {
540 fn declare(declare: &Declarations<'_>) -> Result<()> {
541 declare.declare(stringify!(XMLParser))?;
542 declare.declare(stringify!(XmlText))?;
543 declare.declare(stringify!(XmlNode))?;
544
545 declare.declare("default")?;
546
547 Ok(())
548 }
549
550 fn evaluate<'js>(ctx: &Ctx<'js>, exports: &Exports<'js>) -> Result<()> {
551 export_default(ctx, exports, |default| {
552 Class::<XMLParser>::define(default)?;
553 Class::<XmlText>::define(default)?;
554 Class::<XmlNode>::define(default)?;
555 Ok(())
556 })?;
557
558 Ok(())
559 }
560}
561
562