serde_llsd/de/
xml.rs

1//
2//  de/xml.rs -- XML deserializer for LLSD
3//
4//  Library for serializing and de-serializing data in
5//  Linden Lab Structured Data format.
6//
7//  Format documentation is at http://wiki.secondlife.com/wiki/LLSD
8//
9//  XML format.
10//
11//  Animats
12//  February, 2021.
13//  License: LGPL.
14//
15use crate::LLSDValue;
16use anyhow::{anyhow, Error};
17use ascii85;
18use base64;
19use base64::Engine;
20use quick_xml::events::attributes::Attributes;
21use quick_xml::events::Event;
22use quick_xml::Reader;
23use std::collections::HashMap;
24use std::io::{BufRead, BufReader};
25////use uuid;
26//
27//  Constants
28//
29pub const LLSDXMLPREFIX: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<llsd>\n";
30pub const LLSDXMLSENTINEL: &str = "<?xml"; // Must begin with this.
31///    Parse LLSD expressed in XML into an LLSD tree.
32pub fn from_str(xmlstr: &str) -> Result<LLSDValue, Error> {
33    from_reader(&mut BufReader::new(xmlstr.as_bytes()))
34}
35////let mut reader = Reader::from_str(xmlstr);
36
37/// Read XML from buffered source and parse into LLSDValue.
38pub fn from_reader<R: BufRead>(rdr: &mut R) -> Result<LLSDValue, Error> {
39    let mut reader = Reader::from_reader(rdr); // create an XML reader from a sequential reader
40    reader.trim_text(true); // do not want trailing blanks
41    reader.expand_empty_elements(true); // want end tag events always
42    let mut buf = Vec::new(); // reader work area
43    let mut output: Option<LLSDValue> = None;
44    //  Outer parse. Find <llsd> and parse its interior.
45    loop {
46        match reader.read_event(&mut buf) {
47            Ok(Event::Start(ref e)) => {
48                match e.name() {
49                    b"llsd" => {
50                        if output.is_some() {
51                            return Err(anyhow!("More than one <llsd> block in data"));
52                        }
53                        let mut buf2 = Vec::new();
54                        match reader.read_event(&mut buf2) {
55                            Ok(Event::Start(ref e)) => {
56                                let tagname = std::str::from_utf8(e.name())?; // tag name as string to start parse
57                                                                              //  This does all the real work.
58                                output = Some(parse_value(&mut reader, tagname, &e.attributes())?);
59                            }
60                            _ => {
61                                return Err(anyhow!(
62                                    "Expected LLSD data, found {:?} error at position {}",
63                                    e.name(),
64                                    reader.buffer_position()
65                                ))
66                            }
67                        };
68                    }
69                    _ => {
70                        return Err(anyhow!(
71                            "Expected <llsd>, found {:?} error at position {}",
72                            e.name(),
73                            reader.buffer_position()
74                        ))
75                    }
76                }
77            }
78            Ok(Event::Text(_e)) => (), // Don't actually need random text
79            Ok(Event::End(ref _e)) => (), // Tag matching check is automatic.
80            Ok(Event::Eof) => break,   // exits the loop when reaching end of file
81            Err(e) => {
82                return Err(anyhow!(
83                    "Error at position {}: {:?}",
84                    reader.buffer_position(),
85                    e
86                ))
87            }
88            _ => (), // There are several other `Event`s we do not consider here
89        }
90
91        // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
92        buf.clear()
93    }
94    //  Final result, if stored
95    match output {
96        Some(out) => Ok(out),
97        None => Err(anyhow!("Unexpected end of data, no <llsd> block.")),
98    }
99}
100
101/// Parse one value - real, integer, map, etc. Recursive.
102////fn parse_value<R: Read+BufRead>(rdr: &mut R) -> Result<LLSDValue, Error> {
103fn parse_value<R: BufRead>(
104    reader: &mut Reader<&mut R>,
105    starttag: &str,
106    attrs: &Attributes,
107) -> Result<LLSDValue, Error> {
108    //  Entered with a start tag alread parsed and in starttag
109    match starttag {
110        "undef" | "real" | "integer" | "boolean" | "string" | "uri" | "binary" | "uuid"
111        | "date" => parse_primitive_value(reader, starttag, attrs),
112        "map" => parse_map(reader),
113        "array" => parse_array(reader),
114        _ => Err(anyhow!(
115            "Unknown data type <{}> at position {}",
116            starttag,
117            reader.buffer_position()
118        )),
119    }
120}
121
122/// Parse one value - real, integer, map, etc. Recursive.
123fn parse_primitive_value<R: BufRead>(
124    reader: &mut Reader<&mut R>,
125    starttag: &str,
126    attrs: &Attributes,
127) -> Result<LLSDValue, Error> {
128    //  Entered with a start tag already parsed and in starttag
129    let mut texts = Vec::new(); // accumulate text here
130    let mut buf = Vec::new();
131    loop {
132        let event = reader.read_event(&mut buf);
133        match event {
134            Ok(Event::Text(e)) => texts.push(e.unescape_and_decode(reader)?),
135            Ok(Event::End(ref e)) => {
136                let tagname = std::str::from_utf8(e.name())?; // tag name as string
137                if starttag != tagname {
138                    return Err(anyhow!(
139                        "Unmatched XML tags: <{}> .. <{}>",
140                        starttag,
141                        tagname
142                    ));
143                };
144                //  End of an XML tag. Value is in text.
145                let text = texts.join(" ").trim().to_string(); // combine into one big string
146                texts.clear();
147                //  Parse the primitive types.
148                return match starttag {
149                    "undef" => Ok(LLSDValue::Undefined),
150                    "real" => Ok(LLSDValue::Real(
151                        if text.to_lowercase() == "nan" {
152                            "NaN".to_string()
153                        } else {
154                            text
155                        }
156                        .parse::<f64>()?,
157                    )),
158                    "integer" => Ok(LLSDValue::Integer(parse_integer(&text)?)),
159                    "boolean" => Ok(LLSDValue::Boolean(parse_boolean(&text)?)),
160                    "string" => Ok(LLSDValue::String(text)),
161                    "uri" => Ok(LLSDValue::String(text)),
162                    "uuid" => Ok(LLSDValue::UUID(if text.is_empty() {
163                        uuid::Uuid::nil()
164                    } else {
165                        uuid::Uuid::parse_str(&text)?
166                    })),
167                    "date" => Ok(LLSDValue::Date(parse_date(&text)?)),
168                    "binary" => Ok(LLSDValue::Binary(parse_binary(&text, attrs)?)),
169                    _ => Err(anyhow!(
170                        "Unexpected primitive data type <{}> at position {}",
171                        starttag,
172                        reader.buffer_position()
173                    )),
174                };
175                // unreachable
176            }
177            Ok(Event::Eof) => {
178                return Err(anyhow!(
179                    "Unexpected end of data in primitive value at position {}",
180                    reader.buffer_position()
181                ))
182            }
183            Ok(Event::Comment(_)) => {} // ignore comment
184            Err(e) => {
185                return Err(anyhow!(
186                    "Parse Error at position {}: {:?}",
187                    reader.buffer_position(),
188                    e
189                ))
190            }
191            _ => {
192                return Err(anyhow!(
193                    "Unexpected parse event {:?} at position {} while parsing: {:?}",
194                    event,
195                    reader.buffer_position(),
196                    starttag
197                ))
198            }
199        }
200    }
201}
202
203//  Parse one map.
204fn parse_map<R: BufRead>(reader: &mut Reader<&mut R>) -> Result<LLSDValue, Error> {
205    //  Entered with a "map" start tag just parsed.
206    let mut map: HashMap<String, LLSDValue> = HashMap::new(); // accumulating map
207    let mut texts = Vec::new(); // accumulate text here
208    let mut buf = Vec::new();
209    loop {
210        let event = reader.read_event(&mut buf);
211        match event {
212            Ok(Event::Start(ref e)) => {
213                let tagname = std::str::from_utf8(e.name())?; // tag name as string
214                match tagname {
215                    "key" => {
216                        let (k, v) = parse_map_entry(reader)?; // read one key/value pair
217                        let _dup = map.insert(k, v); // insert into map
218                                                     //  Duplicates are not errors, per LLSD spec.
219                    }
220                    _ => {
221                        return Err(anyhow!("Expected 'key' in map, found '{}'", tagname));
222                    }
223                }
224            }
225            Ok(Event::Text(e)) => texts.push(e.unescape_and_decode(reader)?),
226            Ok(Event::End(ref e)) => {
227                //  End of an XML tag. No text expected.
228                let tagname = std::str::from_utf8(e.name())?; // tag name as string
229                if "map" != tagname {
230                    return Err(anyhow!("Unmatched XML tags: <{}> .. <{}>", "map", tagname));
231                };
232                return Ok(LLSDValue::Map(map)); // done, valid result
233            }
234            Ok(Event::Eof) => {
235                return Err(anyhow!(
236                    "Unexpected end of data in map at position {}",
237                    reader.buffer_position()
238                ))
239            }
240            Ok(Event::Comment(_)) => {} // ignore comment
241            Err(e) => {
242                return Err(anyhow!(
243                    "Parse Error at position {}: {:?}",
244                    reader.buffer_position(),
245                    e
246                ))
247            }
248            _ => {
249                return Err(anyhow!(
250                    "Unexpected parse event {:?} at position {} while parsing map",
251                    event,
252                    reader.buffer_position(),
253                ))
254            }
255        }
256    }
257}
258
259//  Parse one map entry.
260//  Format <key> STRING </key> LLSDVALUE
261fn parse_map_entry<R: BufRead>(reader: &mut Reader<&mut R>) -> Result<(String, LLSDValue), Error> {
262    //  Entered with a "key" start tag just parsed.  Expecting text.
263    let mut texts = Vec::new(); // accumulate text here
264    let mut buf = Vec::new();
265    loop {
266        let event = reader.read_event(&mut buf);
267        match event {
268            Ok(Event::Start(ref e)) => {
269                let tagname = std::str::from_utf8(e.name())?; // tag name as string
270                return Err(anyhow!("Expected 'key' in map, found '{}'", tagname));
271            }
272            Ok(Event::Text(e)) => texts.push(e.unescape_and_decode(reader)?),
273            Ok(Event::End(ref e)) => {
274                //  End of an XML tag. Should be </key>
275                let tagname = std::str::from_utf8(e.name())?; // tag name as string
276                if "key" != tagname {
277                    return Err(anyhow!("Unmatched XML tags: <{}> .. <{}>", "key", tagname));
278                };
279                let mut buf = Vec::new();
280                let k = texts.join(" ").trim().to_string(); // the key
281                texts.clear();
282                match reader.read_event(&mut buf) {
283                    Ok(Event::Start(ref e)) => {
284                        let tagname = std::str::from_utf8(e.name())?; // tag name as string
285                        let v = parse_value(reader, tagname, &e.attributes())?; // parse next value
286                        return Ok((k, v)); // return key value pair
287                    }
288                    _ => {
289                        return Err(anyhow!(
290                            "Unexpected parse error at position {} while parsing map entry",
291                            reader.buffer_position()
292                        ))
293                    }
294                };
295            }
296            Ok(Event::Eof) => {
297                return Err(anyhow!(
298                    "Unexpected end of data at position {}",
299                    reader.buffer_position()
300                ))
301            }
302            Ok(Event::Comment(_)) => {} // ignore comment
303            Err(e) => {
304                return Err(anyhow!(
305                    "Parse Error at position {}: {:?}",
306                    reader.buffer_position(),
307                    e
308                ))
309            }
310            _ => {
311                return Err(anyhow!(
312                    "Unexpected parse event {:?} at position {} while parsing map entry",
313                    event,
314                    reader.buffer_position(),
315                ))
316            }
317        }
318    }
319}
320
321/// Parse one LLSD object. Recursive.
322fn parse_array<R: BufRead>(reader: &mut Reader<&mut R>) -> Result<LLSDValue, Error> {
323    //  Entered with an <array> tag just parsed.
324    let mut texts = Vec::new(); // accumulate text here
325    let mut buf = Vec::new();
326    let mut items: Vec<LLSDValue> = Vec::new(); // accumulate items.
327    loop {
328        let event = reader.read_event(&mut buf);
329        match event {
330            Ok(Event::Start(ref e)) => {
331                let tagname = std::str::from_utf8(e.name())?; // tag name as string
332                                                              //  Parse one data item.
333                items.push(parse_value(reader, tagname, &e.attributes())?);
334            }
335            Ok(Event::Text(e)) => texts.push(e.unescape_and_decode(reader)?),
336            Ok(Event::End(ref e)) => {
337                //  End of an XML tag. Should be </array>
338                let tagname = std::str::from_utf8(e.name())?; // tag name as string
339                if "array" != tagname {
340                    return Err(anyhow!(
341                        "Unmatched XML tags: <{}> .. <{}>",
342                        "array",
343                        tagname
344                    ));
345                };
346                break; // end of array
347            }
348            Ok(Event::Eof) => {
349                return Err(anyhow!(
350                    "Unexpected end of data at position {}",
351                    reader.buffer_position()
352                ))
353            }
354            Ok(Event::Comment(_)) => {} // ignore comment
355            Err(e) => {
356                return Err(anyhow!(
357                    "Parse Error at position {}: {:?}",
358                    reader.buffer_position(),
359                    e
360                ))
361            }
362            _ => {
363                return Err(anyhow!(
364                    "Unexpected parse event {:?} at position {} while parsing array",
365                    event,
366                    reader.buffer_position(),
367                ))
368            }
369        }
370    }
371    Ok(LLSDValue::Array(items)) // result is array of items
372}
373
374/// Parse binary object.
375/// Input in base64, base16, or base85.
376fn parse_binary(s: &str, attrs: &Attributes) -> Result<Vec<u8>, Error> {
377    // "Parsers must support base64 encoding. Parsers may support base16 and base85."
378    let encoding = match get_attr(attrs, b"encoding")? {
379        Some(enc) => enc,
380        None => "base64".to_string(), // default
381    };
382    //  Decode appropriately.
383    Ok(match encoding.as_str() {
384        "base64" => base64::engine::general_purpose::STANDARD.decode(s)?,
385        "base16" => hex::decode(s)?,
386        "base85" => match ascii85::decode(s) {
387            Ok(v) => v,
388            Err(e) => return Err(anyhow!("Base 85 decode error: {:?}", e)),
389        },
390        _ => {
391            return Err(anyhow!(
392                "Unknown encoding: <binary encoding=\"{}\">",
393                encoding
394            ))
395        }
396    })
397}
398
399/// Parse ISO 9660 date, simple form.
400fn parse_date(s: &str) -> Result<i64, Error> {
401    Ok(chrono::DateTime::parse_from_rfc3339(s)?.timestamp())
402}
403
404/// Parse integer. LSL allows the empty string as 0.
405fn parse_integer(s: &str) -> Result<i32, Error> {
406    let s = s.trim();
407    if s.is_empty() {
408        Ok(0)               // empty string
409    } else {
410        Ok(s.parse::<i32>()?)    // nonempty string
411    }
412}
413
414///  Parse boolean. LSL allows 0. 0.0, false, 1. 1.0, true.
415fn parse_boolean(s: &str) -> Result<bool, Error> {
416    Ok(match s {
417        "0" | "0.0" => false,
418        "1" | "1.0" => true,
419        _ => s.parse::<bool>()?,
420    })
421}
422
423/// Search for attribute in attribute list
424fn get_attr(attrs: &Attributes, key: &[u8]) -> Result<Option<String>, Error> {
425    //  Each step has a possible error, so it's hard to do this more cleanly.
426    for attr in attrs.clone() {
427        let a = attr?;
428        if a.key != key {
429            continue;
430        } // not this one
431        let v = a.unescaped_value()?;
432        let sv = std::str::from_utf8(&v)?;
433        return Ok(Some(sv.to_string()));
434    }
435    Ok(None)
436}
437
438// Unit tests
439
440#[test]
441fn xmlparsetest1() {
442
443    const TESTXMLZERO: &str = r#"
444<?xml version="1.0" encoding="UTF-8"?>
445<llsd>
446<array>
447<integer>0</integer>
448<integer>100</integer>
449<integer />
450</array>
451</llsd>
452"#;
453
454    const TESTXMLZEROARRAY: [i32;3] = [ 0, 100, 0 ]; // expected values
455
456    const TESTXMLNAN: &str = r#"
457<?xml version="1.0" encoding="UTF-8"?>
458<llsd>
459<array>
460<real>nan</real>
461<real>0</real>
462<undef />
463</array>
464</llsd>
465"#;
466
467    const TESTXML1: &str = r#"
468<?xml version="1.0" encoding="UTF-8"?>
469<llsd>
470<map>
471  <key>region_id</key>
472    <uuid>67153d5b-3659-afb4-8510-adda2c034649</uuid>
473  <key>scale</key>
474    <string>one minute</string>
475  <key>simulator statistics</key>
476  <map>
477    <key>time dilation</key><real>0.9878624</real>
478    <key>sim fps</key><real>44.38898</real>
479    <key>pysics fps</key><real>44.38906</real>
480    <key>lsl instructions per second</key><real>0</real>
481    <key>total task count</key><real>4</real>
482    <key>active task count</key><real>0</real>
483    <key>active script count</key><real>4</real>
484    <key>main agent count</key><real>0</real>
485    <key>child agent count</key><real>0</real>
486    <key>inbound packets per second</key><real>1.228283</real>
487    <key>outbound packets per second</key><real>1.277508</real>
488    <key>pending downloads</key><real>0</real>
489    <key>pending uploads</key><real>0.0001096525</real>
490    <key>frame ms</key><real>0.7757886</real>
491    <key>net ms</key><real>0.3152919</real>
492    <key>sim other ms</key><real>0.1826937</real>
493    <key>sim physics ms</key><real>0.04323055</real>
494    <key>agent ms</key><real>0.01599029</real>
495    <key>image ms</key><real>0.01865955</real>
496    <key>script ms</key><real>0.1338836</real>
497    <!-- Comment - some additional test values -->
498    <key>hex number</key><binary encoding="base16">0fa1</binary>
499    <key>base64 number</key><binary>SGVsbG8gd29ybGQ=</binary>
500    <key>date</key><date>2006-02-01T14:29:53Z</date>
501    <key>array</key>
502        <array>
503            <boolean>false</boolean>
504            <integer>42</integer>
505            <undef/>
506            <uuid/>
507            <boolean>1</boolean>
508        </array>
509  </map>
510</map>
511</llsd>
512"#;
513
514    fn trytestcase(teststr: &str) {
515        //  Internal utility function.
516        //  Parse canned XML test case into internal format.
517        //  Must not contain NaN, because NaN != Nan and the equal test will fail
518        let parsed1 = from_str(teststr).unwrap();
519        println!("Parse of {}: \n{:#?}", teststr, parsed1);
520        //  Generate XML back from parsed version.
521        let generated = crate::ser::xml::to_string(&parsed1, true).unwrap();
522        //  Parse that.
523        let parsed2 = from_str(&generated).unwrap();
524        //  Check that parses match.
525        assert_eq!(parsed1, parsed2);
526    }
527    trytestcase(TESTXML1);
528    //  Special test cases.
529    //  Test zero case, where an empty <integer /> is 0, per spec.
530    {   let parsed0 = from_str(TESTXMLZERO).unwrap();
531        println!("Parse of {}: \n{:#?}", TESTXMLZERO, parsed0);
532        let arr = parsed0.as_array().unwrap();  // yields array of LLSD values
533        assert_eq!(arr.len() , TESTXMLZEROARRAY.len()); // lengths must match
534        for (item, n) in arr.iter().zip(TESTXMLZEROARRAY) {
535            assert_eq!(n, *(item.as_integer().unwrap()));  // must match
536        }
537    }
538    //  Test NAN case
539    {
540        let parsed1 = from_str(TESTXMLNAN).unwrap();
541        println!("Parse of {}: \n{:#?}", TESTXMLNAN, parsed1);
542        //  Generate XML back from parsed version.
543        let generated = crate::ser::xml::to_string(&parsed1, true).unwrap();
544        //  Remove all white space for comparison
545        let s1 = TESTXMLNAN.replace(" ", "").replace("\n", "");
546        let s2 = generated.replace(" ", "").replace("\n", "");
547        assert_eq!(s1, s2);
548    }
549
550    
551}