rqjs_ext/modules/
xml.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3use std::collections::HashMap;
4
5use quick_xml::{
6    events::{BytesStart, Event},
7    Reader,
8};
9
10use rquickjs::{
11    class::{Trace, Tracer},
12    function::Opt,
13    module::{Declarations, Exports, ModuleDef},
14    object::Property,
15    prelude::This,
16    Array, Class, Ctx, Error, Function, IntoJs, Object, Result, Value,
17};
18
19const AMP: &str = "&";
20const LT: &str = "<";
21const GT: &str = ">";
22const QUOT: &str = """;
23const APOS: &str = "'";
24const CR: &str = "
";
25const LF: &str = "
";
26const NEL: &str = "…";
27const LS: &str = "
";
28
29use crate::{
30    // module_builder::ModuleInfo,
31    modules::module::export_default,
32    utils::{
33        object::{get_bytes, ObjectExt},
34        result::ResultExt,
35        string::JoinToString,
36    },
37};
38
39#[rquickjs::class]
40struct XMLParser<'js> {
41    tag_value_processor: Option<Function<'js>>,
42    attribute_value_processor: Option<Function<'js>>,
43    attribute_name_prefix: String,
44    ignore_attributes: bool,
45    text_node_name: String,
46    entities: HashMap<String, String>,
47}
48
49impl<'js> Trace<'js> for XMLParser<'js> {
50    fn trace<'a>(&self, tracer: Tracer<'a, 'js>) {
51        if let Some(tag_value_processor) = &self.tag_value_processor {
52            tracer.mark(tag_value_processor)
53        }
54        if let Some(attribute_value_processor) = &self.attribute_value_processor {
55            tracer.mark(attribute_value_processor)
56        }
57    }
58}
59
60struct StackObject<'js> {
61    obj: Object<'js>,
62    has_value: bool,
63}
64impl<'js> StackObject<'js> {
65    fn new(ctx: Ctx<'js>) -> Result<Self> {
66        Ok(Self {
67            obj: Object::new(ctx)?,
68            has_value: false,
69        })
70    }
71
72    fn into_value(self, ctx: &Ctx<'js>) -> Result<Value<'js>> {
73        if self.has_value {
74            return Ok(self.obj.into_value());
75        }
76        "".into_js(ctx)
77    }
78}
79
80#[rquickjs::methods(rename_all = "camelCase")]
81impl<'js> XMLParser<'js> {
82    #[qjs(constructor)]
83    pub fn new(_ctx: Ctx<'js>, options: Opt<Object<'js>>) -> Result<Self> {
84        let mut tag_value_processor = None;
85        let mut attribute_value_processor = None;
86        let mut attribute_name_prefix = String::from("@_");
87        let mut ignore_attributes = true;
88        let mut text_node_name = String::from("#text");
89        if let Some(options) = options.0 {
90            tag_value_processor = options.get_optional("tagValueProcessor")?;
91            attribute_value_processor = options.get_optional("attributeValueProcessor")?;
92            if let Some(prefix) = options.get_optional("attributeNamePrefix")? {
93                attribute_name_prefix = prefix;
94            }
95            if let Some(attributes_ignored) = options.get_optional("ignoreAttributes")? {
96                ignore_attributes = attributes_ignored
97            }
98            if let Some(name) = options.get_optional("textNodeName")? {
99                text_node_name = name
100            }
101        }
102
103        Ok(XMLParser {
104            tag_value_processor,
105            attribute_value_processor,
106            entities: HashMap::new(),
107            attribute_name_prefix,
108            ignore_attributes,
109            text_node_name,
110        })
111    }
112
113    pub fn add_entity(&mut self, key: String, value: String) {
114        self.entities.insert(key, value);
115    }
116
117    pub fn parse(&self, ctx: Ctx<'js>, xml: Value<'js>) -> Result<Object<'js>> {
118        let bytes = get_bytes(&ctx, xml)?;
119        let mut reader = Reader::from_reader(bytes.as_ref());
120        reader.config_mut().trim_text(true);
121
122        let mut current_obj = StackObject::new(ctx.clone())?;
123        current_obj.has_value = true;
124        let mut buf = Vec::new();
125        let mut current_key = String::new();
126        let mut current_value: Option<String> = None;
127        let mut path: Vec<(String, StackObject<'js>)> = vec![];
128        let mut has_attributes = false;
129
130        loop {
131            buf.clear();
132
133            match reader.read_event_into(&mut buf) {
134                Ok(Event::Empty(ref tag)) => {
135                    current_key = Self::get_tag_name(&ctx, &reader, tag)?;
136
137                    let mut obj = StackObject::new(ctx.clone())?;
138                    self.process_attributes(&ctx, &reader, &path, tag, &mut obj, &mut false)?;
139                    current_obj.has_value = true;
140
141                    Self::process_end(&ctx, &current_obj, obj.into_value(&ctx)?, &current_key)?;
142                }
143                Ok(Event::Start(ref tag)) => {
144                    has_attributes = false;
145                    current_key = Self::get_tag_name(&ctx, &reader, tag)?;
146                    path.push((current_key.clone(), current_obj));
147
148                    let obj = StackObject::new(ctx.clone())?;
149                    current_obj = obj;
150
151                    self.process_attributes(
152                        &ctx,
153                        &reader,
154                        &path,
155                        tag,
156                        &mut current_obj,
157                        &mut has_attributes,
158                    )?;
159                }
160                Ok(Event::End(_)) => {
161                    let (parent_tag, mut parent_obj) = path.pop().unwrap();
162                    parent_obj.has_value = true;
163                    let value = if let Some(value) = current_value.take() {
164                        value.into_js(&ctx)?
165                    } else {
166                        current_obj.into_value(&ctx)?
167                    };
168
169                    current_obj = parent_obj;
170
171                    Self::process_end(&ctx, &current_obj, value, &parent_tag)?;
172                }
173                Ok(Event::CData(text)) => {
174                    let text = text.escape().or_throw(&ctx)?;
175                    let tag_value = String::from_utf8_lossy(text.as_ref()).to_string();
176                    let tag_value =
177                        self.process_tag_value(&path, &current_key, tag_value, has_attributes)?;
178                    if has_attributes {
179                        current_obj.has_value = true;
180                        current_obj.obj.set(&self.text_node_name, tag_value)?;
181                    } else {
182                        current_value = Some(tag_value)
183                    }
184                }
185                Ok(Event::Text(ref text)) => {
186                    let tag_value = text
187                        .unescape_with(|v| self.entities.get(v).map(|x| x.as_str()))
188                        .or_throw(&ctx)?
189                        .to_string();
190                    let tag_value =
191                        self.process_tag_value(&path, &current_key, tag_value, has_attributes)?;
192
193                    if has_attributes {
194                        current_obj.has_value = true;
195                        current_obj.obj.set(&self.text_node_name, tag_value)?;
196                    } else {
197                        current_value = Some(tag_value)
198                    }
199                }
200                Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
201                Ok(Event::Eof) => break,
202                _ => {}
203            }
204        }
205        Ok(current_obj.obj)
206    }
207}
208
209impl<'js> XMLParser<'js> {
210    fn get_tag_name(
211        ctx: &Ctx<'js>,
212        reader: &Reader<&[u8]>,
213        tag: &BytesStart<'_>,
214    ) -> Result<String> {
215        let tag = tag.name();
216        let tag_name = reader.decoder().decode(tag.as_ref()).or_throw(ctx)?;
217
218        Ok(tag_name.to_string())
219    }
220
221    fn process_end(
222        ctx: &Ctx<'js>,
223        current_obj: &StackObject<'js>,
224        value: Value<'js>,
225        tag: &str,
226    ) -> Result<()> {
227        if current_obj.obj.contains_key(tag)? {
228            let parent_value: Value = current_obj.obj.get(tag)?;
229            if !parent_value.is_array() {
230                let array = Array::new(ctx.clone())?;
231                array.set(0, parent_value)?;
232                array.set(1, value)?;
233                current_obj.obj.set(tag, array.as_value())?;
234            } else {
235                let array = parent_value.as_array().or_throw(ctx)?;
236                array.set(array.len(), value)?;
237                current_obj.obj.set(tag, array.as_value())?;
238            }
239        } else {
240            current_obj.obj.prop(
241                tag,
242                Property::from(value).configurable().enumerable().writable(),
243            )?;
244        }
245        Ok(())
246    }
247
248    fn process_attributes(
249        &self,
250        ctx: &Ctx<'js>,
251        reader: &Reader<&[u8]>,
252        path: &[(String, StackObject<'js>)],
253        tag: &BytesStart<'_>,
254        stack_object: &mut StackObject<'js>,
255        has_attributes: &mut bool,
256    ) -> Result<()> {
257        if !self.ignore_attributes {
258            for attribute in tag.attributes() {
259                stack_object.has_value = true;
260                *has_attributes = true;
261                let attr = attribute.or_throw(ctx)?;
262
263                let key_slice = attr.key.as_ref();
264                let key = if !self.attribute_name_prefix.is_empty() {
265                    let prefix_bytes = self.attribute_name_prefix.as_bytes();
266                    let mut key_bytes = Vec::with_capacity(prefix_bytes.len() + key_slice.len());
267                    key_bytes.extend_from_slice(prefix_bytes);
268                    key_bytes.extend_from_slice(key_slice);
269
270                    reader
271                        .decoder()
272                        .decode(&key_bytes)
273                        .or_throw(ctx)?
274                        .to_string()
275                } else {
276                    reader
277                        .decoder()
278                        .decode(key_slice)
279                        .or_throw(ctx)?
280                        .to_string()
281                };
282
283                let mut value = reader
284                    .decoder()
285                    .decode(attr.value.as_ref())
286                    .or_throw(ctx)?
287                    .to_string();
288
289                if let Some(attribute_value_processor) = &self.attribute_value_processor {
290                    let jpath: String = path.iter().join_to_string(".", |(k, _)| k);
291                    if let Some(new_value) =
292                        attribute_value_processor.call((key.clone(), value.clone(), jpath))?
293                    {
294                        value = new_value
295                    }
296                }
297                stack_object.obj.set(key, value)?;
298            }
299        }
300        Ok(())
301    }
302
303    fn process_tag_value(
304        &self,
305        path: &[(String, StackObject<'js>)],
306        key: &String,
307        value: String,
308        has_attributes: bool,
309    ) -> Result<String> {
310        if value.is_empty() {
311            return Ok(value);
312        }
313
314        if let Some(tag_value_processor) = &self.tag_value_processor {
315            let jpath: String = path.iter().join_to_string(".", |(k, _)| k);
316            if let Some(new_value) =
317                tag_value_processor.call((key, value.clone(), jpath, has_attributes))?
318            {
319                return Ok(new_value);
320            }
321        }
322        Ok::<_, Error>(value)
323    }
324}
325
326#[derive(Debug, Clone)]
327#[rquickjs::class]
328struct XmlText {
329    value: String,
330}
331
332impl<'js> Trace<'js> for XmlText {
333    fn trace<'a>(&self, _tracer: Tracer<'a, 'js>) {}
334}
335
336#[rquickjs::methods(rename_all = "camelCase")]
337impl XmlText {
338    #[qjs(constructor)]
339    fn new(value: String) -> Self {
340        let mut escaped = String::with_capacity(value.len());
341        escape_element(&mut escaped, &value);
342        XmlText { value: escaped }
343    }
344
345    fn to_string(&self) -> String {
346        self.value.clone()
347    }
348}
349
350#[derive(Debug, Clone)]
351#[rquickjs::class]
352#[derive(rquickjs::class::Trace)]
353struct XmlNode<'js> {
354    #[qjs(skip_trace)]
355    name: String,
356    //child and attributes are always set to avoid branch checks when adding/removing values
357    children: Vec<Value<'js>>,
358    #[qjs(skip_trace)]
359    //vec iteration is faster since we rarely have more than 10 attrs and we want to retain insertion order
360    attributes: Vec<(String, String)>,
361}
362
363enum NodeStackEntry<'js> {
364    Node(Class<'js, XmlNode<'js>>),
365    End(String),
366}
367
368#[rquickjs::methods(rename_all = "camelCase")]
369impl<'js> XmlNode<'js> {
370    #[qjs(constructor)]
371    fn new(name: String, children: Opt<Vec<Value<'js>>>) -> Result<Self> {
372        let node = XmlNode {
373            name,
374            attributes: Vec::new(),
375            children: children.0.unwrap_or_default(),
376        };
377
378        Ok(node)
379    }
380
381    #[qjs(static)]
382    fn of(
383        ctx: Ctx<'js>,
384        name: String,
385        child_text: Opt<String>,
386        with_name: Opt<String>,
387    ) -> Result<Value<'js>> {
388        let mut node = XmlNode {
389            name,
390            children: Vec::new(),
391            attributes: Vec::new(),
392        };
393
394        if let Some(text) = child_text.0 {
395            let xml_text = Class::instance(ctx.clone(), XmlText::new(text))?;
396            node.children.push(xml_text.into_value());
397        }
398
399        if let Some(new_name) = with_name.0 {
400            node.name = new_name;
401        }
402
403        node.into_js(&ctx)
404    }
405
406    fn with_name(this: This<Class<'js, Self>>, name: String) -> Class<'js, Self> {
407        this.borrow_mut().name = name;
408        this.0
409    }
410
411    fn add_attribute(
412        this: This<Class<'js, Self>>,
413        name: String,
414        value: String,
415    ) -> Class<'js, Self> {
416        let this2 = this.clone();
417        let mut borrow = this2.borrow_mut();
418        if let Some(pos) = borrow.attributes.iter().position(|(a, _)| a == &name) {
419            borrow.attributes[pos] = (name, value);
420        } else {
421            borrow.attributes.push((name, value));
422        }
423        this.0
424    }
425
426    fn add_child_node(this: This<Class<'js, Self>>, value: Value<'js>) -> Result<Class<'js, Self>> {
427        let this2 = this.clone();
428        this2.borrow_mut().children.push(value);
429        Ok(this.0)
430    }
431
432    fn remove_attribute(this: This<Class<'js, Self>>, name: String) -> Class<'js, Self> {
433        let this2 = this.clone();
434        let mut borrow = this2.borrow_mut();
435        if let Some(pos) = borrow.attributes.iter().position(|(a, _)| a == &name) {
436            borrow.attributes.remove(pos);
437        }
438        this.0
439    }
440
441    fn to_string(this: This<Class<'js, Self>>, ctx: Ctx<'js>) -> Result<String> {
442        let class = this.0;
443        let mut xml_text = String::with_capacity(8);
444
445        let mut stack = vec![NodeStackEntry::Node(class)];
446
447        while let Some(node) = stack.pop() {
448            match node {
449                NodeStackEntry::Node(node) => {
450                    let borrow = node.borrow();
451                    xml_text.push('<');
452                    xml_text.push_str(&borrow.name);
453
454                    for (attribute_name, attribute) in &borrow.attributes {
455                        xml_text.push(' ');
456                        xml_text.push_str(attribute_name);
457                        xml_text.push_str("=\"");
458                        escape_attribute(&mut xml_text, attribute);
459                        xml_text.push('"');
460                    }
461
462                    let has_children = !borrow.children.is_empty();
463                    if has_children {
464                        stack.push(NodeStackEntry::End(borrow.name.clone()));
465                        xml_text.push('>');
466
467                        // Add children to the stack in reverse order (to maintain original order)
468                        for child in borrow.children.iter().rev() {
469                            if let Some(obj) = child.as_object() {
470                                if let Some(node) = Class::<Self>::from_object(&obj.clone()) {
471                                    stack.push(NodeStackEntry::Node(node))
472                                } else if let Some(text) =
473                                    Class::<XmlText>::from_object(&obj.clone())
474                                {
475                                    xml_text.push_str(&text.borrow().value);
476                                } else {
477                                    let to_string_fn = obj.get::<_, Function>("toString")?;
478                                    let string_value: String = to_string_fn.call(())?;
479                                    xml_text.push_str(&string_value);
480                                }
481                            } else {
482                                let string_value: String = child
483                                    .clone()
484                                    .try_into_string()
485                                    .map_err(|err| format!("Unable to convert {:?} to string", err))
486                                    .or_throw(&ctx)?
487                                    .to_string()?;
488                                xml_text.push_str(&string_value);
489                            }
490                        }
491                    } else {
492                        xml_text.push_str("/>");
493                    }
494                    drop(borrow);
495                }
496                NodeStackEntry::End(name) => {
497                    xml_text.push_str("</");
498                    xml_text.push_str(&name);
499                    xml_text.push('>');
500                }
501            }
502        }
503
504        Ok(xml_text)
505    }
506}
507
508fn escape_attribute(text: &mut String, value: &str) {
509    for c in value.chars() {
510        match c {
511            '&' => text.push_str(AMP),
512            '<' => text.push_str(LT),
513            '>' => text.push_str(GT),
514            '"' => text.push_str(QUOT),
515            _ => text.push(c),
516        }
517    }
518}
519
520fn escape_element(text: &mut String, value: &str) {
521    for c in value.chars() {
522        match c {
523            '&' => text.push_str(AMP),
524            '<' => text.push_str(LT),
525            '>' => text.push_str(GT),
526            '\'' => text.push_str(APOS),
527            '"' => text.push_str(QUOT),
528            '\r' => text.push_str(CR),
529            '\n' => text.push_str(LF),
530            '\u{0085}' => text.push_str(NEL),
531            '\u{2028}' => text.push_str(LS),
532            _ => text.push(c),
533        }
534    }
535}
536
537pub struct XmlModule;
538
539impl ModuleDef for XmlModule {
540    fn declare(declare: &Declarations<'_>) -> Result<()> {
541        declare.declare(stringify!(XMLParser))?;
542        declare.declare(stringify!(XmlText))?;
543        declare.declare(stringify!(XmlNode))?;
544
545        declare.declare("default")?;
546
547        Ok(())
548    }
549
550    fn evaluate<'js>(ctx: &Ctx<'js>, exports: &Exports<'js>) -> Result<()> {
551        export_default(ctx, exports, |default| {
552            Class::<XMLParser>::define(default)?;
553            Class::<XmlText>::define(default)?;
554            Class::<XmlNode>::define(default)?;
555            Ok(())
556        })?;
557
558        Ok(())
559    }
560}
561
562// impl From<XmlModule> for ModuleInfo<XmlModule> {
563//     fn from(val: XmlModule) -> Self {
564//         ModuleInfo {
565//             name: "xml",
566//             module: val,
567//         }
568//     }
569// }