Skip to main content

stamtools/
xml.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, HashMap, BTreeSet};
3use std::fmt::Display;
4use std::fs::read_to_string;
5use std::path::Path;
6use std::hash::{Hash,DefaultHasher,Hasher};
7use std::process::{Command,  Stdio};
8use std::io::{ BufWriter, Write};
9
10use roxmltree::{Document, Node, NodeId, ParsingOptions};
11use serde::Deserialize;
12use stam::*;
13use toml;
14use upon::Engine;
15use std::fmt::Write as FmtWrite;
16use serde_json;
17
18const NS_XML: &str = "http://www.w3.org/XML/1998/namespace";
19const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
20
21
22fn default_set() -> String {
23    "urn:stam-fromxml".into()
24}
25
26#[derive(Deserialize)]
27/// Holds the configuration for mapping a specific XML format to STAM
28pub struct XmlConversionConfig {
29    #[serde(default)]
30    /// Holds configurations for mapping specific XML elements to STAM, evaluated in reverse-order, so put more generic rules before specific ones
31    elements: Vec<XmlElementConfig>,
32
33    #[serde(default)]
34    /// Base elements are named templates, other elements can derive from this
35    baseelements: HashMap<String, XmlElementConfig>,
36
37    #[serde(default)]
38    /// Maps XML prefixes to namespace
39    namespaces: HashMap<String, String>,
40
41    #[serde(default = "XmlWhitespaceHandling::collapse")]
42    /// Default whitespace handling
43    whitespace: XmlWhitespaceHandling,
44
45    #[serde(default)]
46    /// Sets additional context variables that can be used in templates
47    context: HashMap<String, toml::Value>,
48
49    #[serde(default)]
50    /// Sets additional context variables that can be used in templates
51    metadata: Vec<MetadataConfig>,
52
53    #[serde(default)]
54    /// Inject a DTD (for XML entity resolution)
55    inject_dtd: Option<String>,
56
57    #[serde(default = "default_set")]
58    default_set: String,
59
60    #[serde(default)]
61    /// A prefix to assign when setting annotation IDs
62    id_prefix: Option<String>,
63
64    #[serde(default)]
65    /// A suffix to strip when setting annotation IDs
66    id_strip_suffix: Vec<String>,
67
68    #[serde(default)]
69    /// Add provenance information pointing each annotation to the appropriate node in the XML source files where it came from (translates into XPathSelector in Web Annotation output)
70    provenance: bool,
71
72    #[serde(default)]
73    external_filters: Vec<ExternalFilter>,
74
75    #[serde(skip_deserializing)]
76    debug: bool,
77
78}
79
80impl XmlConversionConfig {
81    pub fn new() -> Self {
82        Self {
83            elements: Vec::new(),
84            baseelements: HashMap::new(),
85            namespaces: HashMap::new(),
86            context: HashMap::new(),
87            metadata: Vec::new(),
88            whitespace: XmlWhitespaceHandling::Collapse,
89            default_set: default_set(),
90            inject_dtd: None,
91            id_prefix: None,
92            id_strip_suffix: Vec::new(),
93            provenance: false,
94            external_filters: Vec::new(),
95            debug: false,
96        }
97    }
98
99    pub fn resolve_baseelements(&mut self) -> Result<(), XmlConversionError> {
100        let mut replace: Vec<(usize, XmlElementConfig)> = Vec::new();
101        for (i, element) in self.elements.iter().enumerate() {
102            let mut newelement = None;
103            for basename in element.base.iter().rev() {
104                if let Some(baseelement) = self.baseelements.get(basename) {
105                    if newelement.is_none() {
106                        newelement = Some(element.clone());
107                    }
108                    newelement
109                        .as_mut()
110                        .map(|newelement| newelement.update(baseelement));
111                } else {
112                    return Err(XmlConversionError::ConfigError(format!(
113                        "No such base element: {}",
114                        basename
115                    )));
116                }
117            }
118            if let Some(newelement) = newelement {
119                replace.push((i, newelement));
120            }
121        }
122        for (i, element) in replace {
123            self.elements[i] = element;
124        }
125        Ok(())
126    }
127
128    /// Parse the configuration from a TOML string (load the data from file yourself).
129    pub fn from_toml_str(tomlstr: &str) -> Result<Self, String> {
130        let mut config: Self = toml::from_str(tomlstr).map_err(|e| format!("{}", e))?;
131        config.resolve_baseelements().map_err(|e| format!("{}", e))?;
132        Ok(config)
133    }
134
135    pub fn with_debug(mut self, value: bool) -> Self {
136        self.debug = value;
137        self
138    }
139
140    /// Add provenance information pointing each annotation to the appropriate node in the XML source files where it came from (translates into XPathSelector in Web Annotation output)
141    pub fn with_provenance(mut self, value: bool) -> Self {
142        self.provenance = value;
143        self
144    }
145
146    /// Register an XML namespace with prefix
147    pub fn with_prefix(mut self, prefix: impl Into<String>, namespace: impl Into<String>) -> Self {
148        self.namespaces.insert(prefix.into(), namespace.into());
149        self
150    }
151
152    /// A prefix to assign when setting annotation IDs, within this string you can use the special variable `{resource}` to use the resource ID.
153    pub fn with_id_prefix(mut self, prefix: impl Into<String>) -> Self {
154        self.id_prefix = Some(prefix.into());
155        self
156    }
157
158    /// A suffix to strip when assigning annotation IDs
159    pub fn with_id_strip_suffix(mut self, suffix: impl Into<String>) -> Self {
160        self.id_strip_suffix.push(suffix.into());
161        self
162    }
163
164    /// Inject a DTD (for XML entity resolution)
165    pub fn with_inject_dtd(mut self, dtd: impl Into<String>) -> Self {
166        self.inject_dtd = Some(dtd.into());
167        self
168    }
169
170    /// Set default whitespace handling
171    pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
172        self.whitespace = handling;
173        self
174    }
175
176    /// Set an element configuration
177    pub fn with_element<F>(mut self, expression: &str, setup: F) -> Self
178    where
179        F: Fn(XmlElementConfig) -> XmlElementConfig,
180    {
181        let expression = XPathExpression::new(expression);
182        let element = setup(XmlElementConfig::new(expression));
183        if self.debug {
184            eprintln!("[STAM fromxml] registered {:?}", element);
185        }
186        self.elements.push(element);
187        self
188    }
189
190    /// How to handle this element?
191    fn element_config(&self, node: Node, path: &NodePath) -> Option<&XmlElementConfig> {
192        for elementconfig in self.elements.iter().rev() {
193            if elementconfig.path.test(path, node, self) {
194                return Some(elementconfig);
195            }
196        }
197        None
198    }
199
200    pub fn add_context(&mut self, key: impl Into<String>, value: toml::Value) {
201        self.context.insert(key.into(), value);
202    }
203
204    pub fn debug(&self) -> bool {
205        self.debug
206    }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
210/// Determines how to handle whitespace for an XML element
211pub enum XmlWhitespaceHandling {
212    /// Not specified (used for base templates)
213    Unspecified,
214    //Inherit from parent
215    Inherit,
216    /// Whitespace is kept as is in the XML
217    Preserve,
218    /// all whitespace becomes space, consecutive whitespace is squashed
219    Collapse,
220}
221
222impl Default for XmlWhitespaceHandling {
223    fn default() -> Self {
224        XmlWhitespaceHandling::Unspecified
225    }
226}
227
228impl XmlWhitespaceHandling {
229    fn collapse() -> Self {
230        XmlWhitespaceHandling::Collapse
231    }
232}
233
234#[derive(Debug, Clone, Deserialize, PartialEq, Copy, Default)]
235pub enum XmlAnnotationHandling {
236    /// No annotation
237    #[default]
238    Unspecified,
239
240    /// No annotation
241    None,
242
243    /// Selects the text pertaining to the current element
244    TextSelector,
245
246    /// Selects the text pertaining to the current resource
247    ResourceSelector,
248
249    /// Selects the text between the current element and the next instance of the same element type
250    TextSelectorBetweenMarkers,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254/// XML Element configuration, determines how to map an XML element (identified by an XPath expression) to STAM
255pub struct XmlElementConfig {
256    /// This is XPath-like expression (just a small subset of XPath) to identify an element by its path
257
258    #[serde(default)]
259    path: XPathExpression,
260
261    #[serde(default)]
262    annotation: XmlAnnotationHandling,
263
264    #[serde(default)]
265    annotationdata: Vec<XmlAnnotationDataConfig>,
266
267    /// Template or None for no text handling, prefixes are never targeted by annotations
268    #[serde(default)]
269    textprefix: Option<String>,
270
271    /// Extract text. None means unspecified and defaults to false.
272    #[serde(default)]
273    text: Option<bool>,
274
275    /// Template or None for no text handling, suffixes are never targeted by annotations
276    #[serde(default)]
277    textsuffix: Option<String>,
278
279    // Annotation data for the text prefix
280    #[serde(default)]
281    annotatetextprefix: Vec<XmlAnnotationDataConfig>,
282
283    // Annotation data for the text suffix
284    #[serde(default)]
285    annotatetextsuffix: Vec<XmlAnnotationDataConfig>,
286
287    /// Include the text prefix in the annotation's text selector. None means unspecified and defaults to false
288    #[serde(default)]
289    include_textprefix: Option<bool>,
290
291    /// Include the text suffix in the annotation's text selector. None means unspecified and defaults to false
292    #[serde(default)]
293    include_textsuffix: Option<bool>,
294
295    /// Base elements to derive from
296    #[serde(default)]
297    base: Vec<String>,
298
299    /// Template or None for no ID extraction
300    #[serde(default)]
301    id: Option<String>,
302
303    #[serde(default)]
304    /// Descend into children (false) or not? (true). None means unspecified and defaults to false
305    stop: Option<bool>,
306
307    #[serde(default)]
308    /// Whitespace handling for this element
309    whitespace: XmlWhitespaceHandling,
310
311    #[serde(default)]
312    /// Assigns a scope id to this text range, it can later be referenced to constrain marker based annotation via `marker_scope`
313    scope_id: Option<String>,
314
315    #[serde(default)]
316    /// If annotation handling is TextSelectorBetweenMarkers, this sets a scope so the last marker won't transcend (otherwise you get all text of the document)
317    /// The scope refers to the `scope_id` of another element that was used in text extraction.
318    marker_scope: Option<String>,
319
320}
321
322impl XmlElementConfig {
323    fn new(expression: XPathExpression) -> Self {
324        Self {
325            path: expression,
326            stop: None,
327            whitespace: XmlWhitespaceHandling::Unspecified,
328            annotation: XmlAnnotationHandling::Unspecified,
329            annotationdata: Vec::new(),
330            base: Vec::new(),
331            id: None,
332            textprefix: None,
333            text: None,
334            textsuffix: None,
335            annotatetextprefix: Vec::new(),
336            annotatetextsuffix: Vec::new(),
337            include_textprefix: None,
338            include_textsuffix: None,
339            scope_id: None,
340            marker_scope: None,
341        }
342    }
343
344    pub fn update(&mut self, base: &XmlElementConfig) {
345        if self.whitespace == XmlWhitespaceHandling::Unspecified
346            && base.whitespace != XmlWhitespaceHandling::Unspecified
347        {
348            self.whitespace = base.whitespace;
349        }
350        if self.annotation == XmlAnnotationHandling::Unspecified
351            && base.annotation != XmlAnnotationHandling::Unspecified
352        {
353            self.annotation = base.annotation;
354        }
355        if self.textprefix.is_none() && base.textprefix.is_some() {
356            self.textprefix = base.textprefix.clone();
357        }
358        if self.text.is_none() && base.text.is_some() {
359            self.text = base.text;
360        }
361        if self.textsuffix.is_none() && base.textsuffix.is_some() {
362            self.textsuffix = base.textsuffix.clone();
363        }
364        if self.id.is_none() && base.id.is_some() {
365            self.id = base.id.clone();
366        }
367        if self.stop.is_none() && base.stop.is_some() {
368            self.stop = base.stop;
369        }
370        for annotationdata in base.annotationdata.iter() {
371            if !self.annotationdata.contains(annotationdata) {
372                self.annotationdata.push(annotationdata.clone());
373            }
374        }
375        if self.annotatetextsuffix.is_empty() && !base.annotatetextsuffix.is_empty() {
376            self.annotatetextsuffix = base.annotatetextsuffix.clone();
377        }
378        if self.annotatetextprefix.is_empty() && !base.annotatetextprefix.is_empty() {
379            self.annotatetextprefix = base.annotatetextprefix.clone();
380        }
381        if self.include_textsuffix.is_none() {
382            self.include_textsuffix = base.include_textsuffix;
383        }
384        if self.include_textprefix.is_none() {
385            self.include_textprefix = base.include_textprefix;
386        }
387    }
388
389
390    /// This sets the mode that determines how the element is handledhttps://www.youtube.com/watch?v=G_BrbhRrP6g
391    pub fn with_stop(mut self, stop: bool) -> Self {
392        self.stop = Some(stop);
393        self
394    }
395
396    /// This sets the whitespace handling for this element
397    pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
398        self.whitespace = handling;
399        self
400    }
401
402    pub fn with_text(mut self, text: bool) -> Self {
403        self.text = Some(text);
404        self
405    }
406
407    pub fn with_base(mut self, iter: impl Iterator<Item = impl Into<String>>) -> Self {
408        self.base = iter.into_iter().map(|s| s.into()).collect();
409        self
410    }
411
412    pub fn without_text(mut self) -> Self {
413        self.text = None;
414        self
415    }
416
417    pub fn with_annotation(mut self, annotation: XmlAnnotationHandling) -> Self {
418        self.annotation = annotation;
419        self
420    }
421
422    /// Not a very safe hash function (just uses an address uniquely associated with this object) but works for our ends
423    fn hash(&self) -> usize {
424        self.path.0.as_ptr() as usize
425    }
426}
427
428impl PartialEq for XmlElementConfig {
429    fn eq(&self, other: &Self) -> bool {
430        self.hash() == other.hash()
431    }
432}
433
434#[derive(Debug, Clone, Deserialize, PartialEq)]
435pub struct XmlAnnotationDataConfig {
436    /// Template
437    id: Option<String>,
438    /// Template
439    set: Option<String>,
440    /// Template
441    key: Option<String>,
442    /// Any string values are interpreted as templates
443    value: Option<toml::Value>,
444
445    /// The type of the value, will be automatically detected if not set.
446    #[serde(default)]
447    valuetype: Option<String>,
448
449    /// Allow value templates that yield an empty string?
450    #[serde(default)]
451    allow_empty_value: bool,
452
453    /// Skip this data entirely if any underlying variables in the templates are undefined
454    #[serde(default)]
455    skip_if_missing: bool,
456
457
458    /// If the value is a list, convert it to multiple annotationdata instances with the same key, one for each of the values
459    #[serde(default)]
460    multiple: bool,
461}
462
463impl XmlAnnotationDataConfig {
464    pub fn with_id(mut self, id: impl Into<String>) -> Self {
465        self.id = Some(id.into());
466        self
467    }
468
469    pub fn with_set(mut self, set: impl Into<String>) -> Self {
470        self.set = Some(set.into());
471        self
472    }
473
474    pub fn with_key(mut self, key: impl Into<String>) -> Self {
475        self.key = Some(key.into());
476        self
477    }
478
479    pub fn with_value(mut self, value: impl Into<toml::Value>) -> Self {
480        self.value = Some(value.into());
481        self
482    }
483}
484
485/// Not really full XPath, just a very minor subset
486#[derive(Debug, Clone, PartialEq, Deserialize)]
487struct XPathExpression(String);
488
489impl XPathExpression {
490    pub fn new(expression: impl Into<String>) -> Self {
491        Self(expression.into())
492    }
493
494    pub fn any() -> Self {
495        Self("*".into())
496    }
497
498    pub fn iter<'a>(
499        &'a self,
500        config: &'a XmlConversionConfig,
501    ) -> impl Iterator<Item = (Option<&'a str>, &'a str, Option<&'a str>)> {
502        self.0.trim_start_matches('/').split("/").map(|segment| {
503            //eprintln!("DEBUG: segment={}", segment);
504            let (prefix, name, condition) = Self::parse_segment(segment);
505            let namespace = if let Some(prefix) = prefix {
506                if let Some(namespace) = config.namespaces.get(prefix).map(|x| x.as_str()) {
507                    Some(namespace)
508                } else {
509                    panic!(
510                        "XML namespace prefix not known in configuration: {}",
511                        prefix
512                    );
513                }
514            } else {
515                None
516            };
517            (namespace, name, condition)
518        })
519    }
520
521    /// matches a node path against an XPath-like expression
522    fn test<'a, 'b>(&self, path: &NodePath<'a, 'b>, mut node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
523        let mut pathiter = path.components.iter().rev();
524        for (refns, refname, condition) in self.iter(config).collect::<Vec<_>>().into_iter().rev() {
525            if let Some(component) = pathiter.next() {
526                /*if config.debug() {
527                    eprintln!("[STAM fromxml]          testing component {:?} against refns={:?} refname={} condition={:?}", component, refns, refname, condition);
528                }*/
529                if refname != "*" && refname != "" {
530                    if refns.is_none() != component.namespace.is_none() || component.namespace != refns || refname != component.tagname {
531                        return false;
532                    }
533                }
534                if let Some(condition) = condition {
535                    if !self.test_condition(condition, node, config) {
536                        return false;
537                    }
538                }
539                if let Some(parent) = node.parent() { 
540                    node = parent;
541                }
542            } else {
543                if refname != "" {
544                    return false;
545                }
546            }
547        }
548        /* if config.debug() {
549            eprintln!("[STAM fromxml]          match");
550        }*/
551        true
552    }
553
554    fn test_condition<'a,'b>(&self, condition: &'a str, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
555        for condition in condition.split(" and ") { //MAYBE TODO: doesn't take literals into account yet!
556            if let Some(pos) = condition.find("!=") {
557                let var = &condition[..pos];
558                let right = condition[pos+2..].trim_matches('"');
559                if self.get_var(var, &node, config) == Some(right) {
560                    return false;
561                }
562            } else if let Some(pos) = condition.find("=") {
563                let var = &condition[..pos];
564                let right = condition[pos+1..].trim_matches('"');
565                let value = self.get_var(var, &node, config);
566                if value != Some(right) {
567                    return false;
568                }
569            } else {
570                //condition is one variable and merely needs to exist
571                let v = self.get_var(condition, &node, config);
572                if v.is_none() || v == Some("") {
573                    return false;
574                }
575            }
576        }
577        /*if config.debug() {
578            eprintln!("[STAM fromxml]          condition matches");
579        }*/
580        true
581    }
582
583    /// Resolve a variable from a conditional expression, given a variable name, node and config
584    fn get_var<'a,'b>(&self, var: &str, node: &Node<'a,'b>, config: &XmlConversionConfig) -> Option<&'a str> { 
585        if var.starts_with("@") {
586            if let Some(pos) = var.find(":") {
587                let prefix = &var[1..pos];
588                if let Some(ns) = config.namespaces.get(prefix) {
589                    let var = &var[pos+1..];
590                    node.attribute((ns.as_str(),var))
591                } else {
592                    None
593                }
594            } else {
595                node.attribute(&var[1..])
596            }
597        } else if var == "text()" {
598            node.text().map(|s|s.trim())
599        } else {
600            None
601        }
602    }
603
604    /// Parses a segment into a namespace-prefix, a name and a condition
605    fn parse_segment<'a>(s: &'a str) -> (Option<&'a str>, &'a str, Option<&'a str>) {
606        let (name, condition) = if let (Some(begin), Some(end)) = (s.find("["), s.rfind("]")) {
607            (&s[..begin], Some(&s[begin + 1..end]))
608        } else {
609            (s, None)
610        };
611        if let Some((prefix, name)) = name.split_once(":") {
612            (Some(prefix), name, condition)
613        } else {
614            (None, name, condition)
615        }
616    }
617}
618
619
620
621impl Default for XPathExpression {
622    fn default() -> Self {
623        Self::any()
624    }
625}
626
627#[derive(Clone, Debug, PartialEq)]
628struct NodePathComponent<'a,'b> {
629    namespace: Option<&'a str>,
630    tagname: &'b str,
631    /// Index sequence number, 1-indexed (as specified by XPath)
632    index: Option<usize>,
633}
634
635#[derive(Clone, Debug, PartialEq, Default)]
636struct NodePath<'a, 'b> {
637    components: Vec<NodePathComponent<'a,'b>>,
638}
639
640impl<'a, 'b> Display for NodePath<'a, 'b> {
641    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
642        for component in self.components.iter() {
643            write!(f, "/")?;
644            if let Some(ns) = component.namespace {
645                if let Some(index) = component.index {
646                    write!(f, "{{{}}}{}[{}]", ns, component.tagname, index)?;
647                } else {
648                    write!(f, "{{{}}}{}", ns, component.tagname)?;
649                }
650            } else {
651                if let Some(index) = component.index {
652                    write!(f, "{}[{}]", component.tagname, index)?;
653                } else {
654                    write!(f, "{}", component.tagname)?;
655                }
656            }
657        }
658        Ok(())
659    }
660}
661
662impl<'a,'b> NodePath<'a,'b> {
663    fn add(&mut self, node: &Node<'a,'b>, index: Option<usize>) {
664        if node.tag_name().name() != "" {
665            self.components.push(
666                NodePathComponent {
667                    namespace: node.tag_name().namespace(),
668                    tagname: node.tag_name().name(),
669                    index,
670                }
671            )
672        }
673    }
674
675    fn format_as_xpath(&self, prefixes: &HashMap<String, String>) -> String {
676        let mut out = String::new();
677        for component in self.components.iter() {
678            out.push('/');
679            if let Some(ns) = component.namespace {
680                if let Some(prefix) = prefixes.get(ns) {
681                    if let Some(index) = component.index {
682                        out += &format!("{}:{}[{}]", prefix, component.tagname, index);
683                    } else {
684                        out += &format!("{}:{}", prefix, component.tagname);
685                    }
686                } else {
687                    eprintln!("STAM fromxml WARNING: format_as_xpath: namespace {} not defined, no prefix found!", ns);
688                    if let Some(index) = component.index {
689                        out += &format!("{}[{}]", component.tagname, index);
690                    } else {
691                        out += &format!("{}", component.tagname);
692                    }
693                }
694            } else {
695                if let Some(index) = component.index {
696                    out += &format!("{}[{}]", component.tagname, index);
697                } else {
698                    out += &format!("{}", component.tagname);
699                }
700            }
701        }
702        out
703    }
704}
705
706
707/// Counts elder siblings, used to determine index values
708#[derive(Default,Debug)]
709struct SiblingCounter {
710    map: HashMap<String,usize>,
711}
712
713impl SiblingCounter {
714    fn count<'a,'b>(&mut self, node: &Node<'a,'b>) -> usize {
715        let s = format!("{:?}", node.tag_name());
716        *self.map.entry(s).and_modify(|c| {*c += 1;}).or_insert(1)
717    }
718}
719
720
721#[derive(Debug, Clone, Deserialize)]
722/// XML Element configuration, determines how to map an XML element (identified by an XPath expression) to STAM
723pub struct MetadataConfig {
724    /// This is XPath-like expression (just a small subset of XPath) to identify an element by its path
725    #[serde(default)]
726    annotation: XmlAnnotationHandling,
727
728    #[serde(default)]
729    annotationdata: Vec<XmlAnnotationDataConfig>,
730
731    /// Template or None for no ID extraction
732    #[serde(default)]
733    id: Option<String>,
734}
735
736/// Translate an XML file to STAM, given a particular configuration
737pub fn from_xml<'a>(
738    filename: &Path,
739    config: &XmlConversionConfig,
740    store: &'a mut AnnotationStore,
741) -> Result<(), String> {
742    if config.debug {
743        eprintln!("[STAM fromxml] parsing {}", filename.display());
744    }
745
746    // Read the raw XML data
747    let mut xmlstring = read_to_string(filename)
748        .map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
749
750    //patchy: remove HTML5 doctype and inject our own
751    if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
752        xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
753    }
754
755    // we can only inject a DTD if there is no doctype
756    if xmlstring[..100].find("<!DOCTYPE").is_none() {
757        if let Some(dtd) = config.inject_dtd.as_ref() {
758            xmlstring = dtd.to_string() + &xmlstring
759        };
760    } else if config.inject_dtd.is_some() {
761        eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
762    }
763
764    // parse the raw XML data into a DOM
765    let doc = Document::parse_with_options(
766        &xmlstring,
767        ParsingOptions {
768            allow_dtd: true,
769            ..ParsingOptions::default()
770        },
771    )
772    .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
773
774    let mut converter = XmlToStamConverter::new(config);
775    converter
776        .compile()
777        .map_err(|e| format!("Error compiling templates: {}", e))?;
778
779    let textoutfilename = format!(
780        "{}.txt",
781        filename
782            .file_stem()
783            .expect("invalid filename")
784            .to_str()
785            .expect("invalid utf-8 in filename")
786    );
787
788    // extract text (first pass)
789    let mut path = NodePath::default();
790    path.add(&doc.root_element(), None);
791    converter
792        .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), 0)
793        .map_err(|e| {
794            format!(
795                "Error extracting element text from {}: {}",
796                filename.display(),
797                e
798            )
799        })?;
800    if config.debug {
801        eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
802    }
803    let resource = TextResourceBuilder::new()
804        .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
805        .with_text(converter.text.clone())
806        .with_filename(&textoutfilename);
807
808    converter.resource_handle = Some(
809        store
810            .add_resource(resource)
811            .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
812    );
813
814    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
815
816    // extract annotations (second pass)
817    converter
818        .extract_element_annotation(doc.root_element(), &path,  Some(&filename.to_string_lossy()),0,  store)
819        .map_err(|e| {
820            format!(
821                "Error extracting element annotation from {}: {}",
822                filename.display(),
823                e
824            )
825        })?;
826
827    Ok(())
828}
829
830/// Translate an XML file to STAM, given a particular configuration. This translates multiple XML files to a single output file.
831pub fn from_multi_xml<'a>(
832    filenames: &Vec<&Path>,
833    outputfile: Option<&Path>,
834    config: &XmlConversionConfig,
835    store: &'a mut AnnotationStore,
836) -> Result<(), String> {
837
838    let textoutfilename = if let Some(outputfile) = outputfile {
839        format!("{}",outputfile.to_str().expect("invalid utf-8 in filename"))
840    } else {
841        format!(
842            "{}.txt",
843                filenames.iter().next().expect("1 or more filename need to be provided")
844                .file_stem()
845                .expect("invalid filename")
846                .to_str()
847                .expect("invalid utf-8 in filename")
848        )
849    };
850
851    // Read the raw XML data
852    let mut xmlstrings: Vec<String> = Vec::new();
853    let mut docs: Vec<Document> = Vec::new();
854    for filename in filenames.iter() {
855        if config.debug {
856            eprintln!("[STAM fromxml] parsing {} (one of multiple)", filename.display());
857        }
858        //patchy: remove HTML5 doctype and inject our own
859        let mut xmlstring = read_to_string(filename).map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
860        if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
861            xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
862        }
863        // we can only inject a DTD if there is no doctype
864        if xmlstring[..100].find("<!DOCTYPE").is_none() {
865            if let Some(dtd) = config.inject_dtd.as_ref() {
866                xmlstring = dtd.to_string() + &xmlstring
867            };
868        } else if config.inject_dtd.is_some() {
869            eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
870        }
871        xmlstrings.push(xmlstring);
872    }
873
874    for (filename, xmlstring) in filenames.iter().zip(xmlstrings.iter()) {
875        // parse the raw XML data into a DOM
876        let doc = Document::parse_with_options(
877            xmlstring,
878            ParsingOptions {
879                allow_dtd: true,
880                ..ParsingOptions::default()
881            },
882        )
883        .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
884        docs.push(doc);
885    }
886
887    let mut converter = XmlToStamConverter::new(config);
888    converter
889        .compile()
890        .map_err(|e| format!("Error compiling templates: {}", e))?;
891
892    for (i, (doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
893        let mut path = NodePath::default();
894        path.add(&doc.root_element(), None);
895        // extract text (first pass)
896        converter
897            .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), i)
898            .map_err(|e| {
899                format!(
900                    "Error extracting element text from {}: {}",
901                    filename.display(),
902                    e
903                )
904            })?;
905        if config.debug {
906            eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
907        }
908    }
909
910    let resource = TextResourceBuilder::new()
911        .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
912        .with_text(converter.text.clone())
913        .with_filename(&textoutfilename);
914
915    converter.resource_handle = Some(
916        store
917            .add_resource(resource)
918            .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
919    );
920
921    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
922
923    // extract annotations (second pass)
924    for (i,(doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
925        let mut path = NodePath::default();
926        path.add(&doc.root_element(), None);
927        converter
928            .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),i,  store)
929            .map_err(|e| {
930                format!(
931                    "Error extracting element annotation from {}: {}",
932                    filename.display(),
933                    e
934                )
935            })?;
936    }
937
938    Ok(())
939}
940
941/// Translate an XML file to STAM, given a particular configuration. Not writing output files and keeping all in memory. Does not support DTD injection.
942pub fn from_xml_in_memory<'a>(
943    resource_id: &str, 
944    xmlstring: &str,
945    config: &XmlConversionConfig,
946    store: &'a mut AnnotationStore,
947) -> Result<(), String> {
948    if config.debug {
949        eprintln!("[STAM fromxml] parsing XML string");
950    }
951
952    // parse the raw XML data into a DOM
953    let doc = Document::parse_with_options(
954        &xmlstring,
955        ParsingOptions {
956            allow_dtd: true,
957            ..ParsingOptions::default()
958        },
959    )
960    .map_err(|e| format!("Error parsing XML string: {}",  e))?;
961
962    let mut converter = XmlToStamConverter::new(config);
963    converter
964        .compile()
965        .map_err(|e| format!("Error compiling templates: {}", e))?;
966
967    let mut path = NodePath::default();
968    path.add(&doc.root_element(), None);
969    // extract text (first pass)
970    converter
971        .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(resource_id), Some(resource_id), 0)
972        .map_err(|e| {
973            format!(
974                "Error extracting element text from {}: {}",
975                resource_id,
976                e
977            )
978        })?;
979    if config.debug {
980        eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
981    }
982    let resource = TextResourceBuilder::new()
983        .with_id(resource_id)
984        .with_text(converter.text.clone());
985
986    converter.resource_handle = Some(
987        store
988            .add_resource(resource)
989            .map_err(|e| format!("Failed to add resource {}: {}", &resource_id, e))?,
990    );
991
992    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata for {}: {}", &resource_id, e))?;
993
994    // extract annotations (second pass)
995    converter
996        .extract_element_annotation(doc.root_element(), &path, Some(resource_id), 0, store)
997        .map_err(|e| {
998            format!(
999                "Error extracting element annotation from {}: {}",
1000                resource_id,
1001                e
1002            )
1003        })?;
1004
1005    Ok(())
1006}
1007
1008pub fn filename_to_id<'a>(filename: &'a str, config: &XmlConversionConfig) -> &'a str {
1009    for suffix in config.id_strip_suffix.iter() {
1010        if filename.ends_with(suffix) {
1011            return &filename[..filename.len() - suffix.len()];
1012        }
1013    }
1014    return filename;
1015}
1016
1017#[derive(Clone,Copy,PartialEq, Hash, Eq)]
1018enum PositionType {
1019    Body,
1020    TextPrefix,
1021    TextSuffix,
1022}
1023
1024struct XmlToStamConverter<'a> {
1025    /// The current character position the conversion process is at
1026    cursor: usize,
1027
1028    /// The extracted plain-text after/during untangling
1029    text: String,
1030
1031    /// The template engine
1032    template_engine: Engine<'a>,
1033
1034    /// Keep track of the new positions (unicode offset) where the node starts in the untangled document. The key consist of a document sequence number and a node ID.
1035    positionmap: HashMap<(usize,NodeId,PositionType), Offset>,
1036
1037    /// Keep track of the new positions (bytes offset) where the node starts in the untangled document. The key consist of a document sequence number and a node ID.
1038    bytepositionmap: HashMap<(usize,NodeId,PositionType), (usize, usize)>,
1039
1040    /// Keep track of markers (XML elements with `XmlAnnotationHandling::TextSelectorBetweenMarkers`), the key in this map is some hash of XmlElementConfig.
1041    markers: HashMap<usize, Vec<(usize,NodeId)>>,
1042
1043    /// Keep track of scopes. These are used to find marker scopes. Only the last scope is registered. The key is an Xpath expression. The value is a sequence number and node ID which can subsequently be looked up in the position maps
1044    scopes: HashMap<String, (usize,NodeId)>,
1045
1046    /// The resource
1047    resource_handle: Option<TextResourceHandle>,
1048
1049    /// Used to keep track of whether we need to insert a whitespace before actual text
1050    pending_whitespace: bool,
1051
1052    /// The configuration
1053    config: &'a XmlConversionConfig,
1054
1055    /// Namespace to prefix map
1056    prefixes: HashMap<String, String>,
1057
1058    ///  Global context for template
1059    global_context: BTreeMap<String, upon::Value>,
1060
1061    /// Variable names per template
1062    variables: BTreeMap<String, BTreeSet<&'a str>>,
1063    
1064    debugindent: String,
1065}
1066
1067pub enum XmlConversionError {
1068    StamError(StamError),
1069    TemplateError(String, Option<upon::Error>),
1070    ConfigError(String),
1071}
1072
1073impl From<StamError> for XmlConversionError {
1074    fn from(error: StamError) -> Self {
1075        Self::StamError(error)
1076    }
1077}
1078
1079impl From<upon::Error> for XmlConversionError {
1080    fn from(error: upon::Error) -> Self {
1081        Self::TemplateError("".into(), Some(error))
1082    }
1083}
1084
1085impl Display for XmlConversionError {
1086    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1087        match self {
1088            Self::StamError(e) => e.fmt(f),
1089            Self::TemplateError(s, e) => {
1090                f.write_str(s.as_str())?;
1091                f.write_str(": ")?;
1092                if let Some(e) = e {
1093                    e.fmt(f)?;
1094                }
1095                f.write_str("")
1096            }
1097            Self::ConfigError(e) => e.fmt(f),
1098        }
1099    }
1100}
1101
1102impl<'a> XmlToStamConverter<'a> {
1103    fn new(config: &'a XmlConversionConfig) -> Self {
1104        let mut prefixes: HashMap<String, String> = HashMap::new();
1105        for (prefix, namespace) in config.namespaces.iter() {
1106            prefixes.insert(namespace.to_string(), prefix.to_string());
1107        }
1108        let mut template_engine = Engine::new();
1109        template_engine.set_default_formatter(&value_formatter); //this one serializes Lists like in JSON
1110        template_engine.add_function("capitalize", filter_capitalize);
1111        template_engine.add_function("lower", str::to_lowercase);
1112        template_engine.add_function("upper", str::to_uppercase);
1113        template_engine.add_function("trim", |s: &str| s.trim().to_string() );
1114        template_engine.add_function("add", filter_add);
1115        template_engine.add_function("sub", filter_sub);
1116        template_engine.add_function("mul", filter_mul);
1117        template_engine.add_function("div", filter_div);
1118        template_engine.add_function("eq", |a: &upon::Value, b: &upon::Value| a == b);
1119        template_engine.add_function("ne", |a: &upon::Value, b: &upon::Value| a != b);
1120        template_engine.add_function("gt", filter_gt);
1121        template_engine.add_function("lt", filter_lt);
1122        template_engine.add_function("gte", filter_gte);
1123        template_engine.add_function("lte", filter_lte);
1124        template_engine.add_function("int", |a: &upon::Value| match a {
1125            upon::Value::Integer(x) => upon::Value::Integer(*x),
1126            upon::Value::Float(x) => upon::Value::Integer(*x as i64), 
1127            upon::Value::String(s) => upon::Value::Integer(s.parse().expect("int filter expects an integer value")),
1128            _ => panic!("int filter expects an integer value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1129        });
1130        template_engine.add_function("float", |a: &upon::Value| match a {
1131            upon::Value::Float(_) => a.clone(),
1132            upon::Value::Integer(x) => upon::Value::Float(*x as f64),
1133            upon::Value::String(s) => upon::Value::Float(s.parse().expect("float filter expects a float value")),
1134            _ => panic!("int filter expects an integer value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1135        });
1136        template_engine.add_function("str", |a: upon::Value| match a {
1137            upon::Value::Integer(x) => upon::Value::String(format!("{}",x)),
1138            upon::Value::Float(x) => upon::Value::String(format!("{}",x)),
1139            upon::Value::Bool(x) => upon::Value::String(format!("{}",x)),
1140            upon::Value::String(_) => a,
1141            upon::Value::None => upon::Value::String(String::new()),
1142            upon::Value::List(list) => { //too much cloning but it'll do for now
1143                let newlist: Vec<String> = list.iter().map(|v| match v {
1144                    upon::Value::String(s) => s.clone(),
1145                    upon::Value::Integer(d) => format!("{}",d),
1146                    upon::Value::Float(d) => format!("{}",d),
1147                    upon::Value::Bool(d) => format!("{}",d),
1148                    _ => String::new(),
1149                }).collect();
1150                upon::Value::String(newlist.join(", "))
1151            },
1152            _ => panic!("map to string not implemented"), //<< --^  TODO: PANIC IS WAY TO STRICT
1153        });
1154        template_engine.add_function("as_range", |a: i64| upon::Value::List(std::ops::Range { start: 0, end: a }.into_iter().map(|x| upon::Value::Integer(x+1)).collect::<Vec<_>>()) );
1155        template_engine.add_function("last", |list: &[upon::Value]| list.last().map(Clone::clone));
1156        template_engine.add_function("first", |list: &[upon::Value]| {
1157            list.first().map(Clone::clone)
1158        });
1159        template_engine.add_function("tokenize", |s: &str| {
1160            upon::Value::List(
1161                s.split(|c| c == ' ' || c == '\n').filter_map(|x|
1162                    if !x.is_empty() { 
1163                        Some(upon::Value::String(x.to_string())) 
1164                    } else {
1165                        None
1166                    }
1167                )
1168                .collect::<Vec<upon::Value>>())
1169        });
1170        template_engine.add_function("replace", |s: &str, from: &str, to: &str| { 
1171            upon::Value::String(s.replace(from,to))
1172        });
1173        template_engine.add_function("starts_with", |s: &str, prefix: &str| { 
1174            s.starts_with(prefix)
1175        });
1176        template_engine.add_function("ends_with", |s: &str, suffix: &str| { 
1177            s.ends_with(suffix)
1178        });
1179        template_engine.add_function("basename", |a: &upon::Value| match a {
1180            upon::Value::String(s) => upon::Value::String(s.split(|c| c == '/' || c == '\\').last().expect("splitting must work").to_string()),
1181            _ => panic!("basename filter expects a string value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1182        });
1183        template_engine.add_function("noext", |a: &upon::Value| match a {
1184            upon::Value::String(s) => if let Some(pos) = s.rfind('.') {
1185                s[..pos].to_string()
1186            } else {
1187                s.to_string()
1188            },
1189            _ => panic!("basename filter expects a string value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1190        });
1191        template_engine.add_function("join", |list: &upon::Value, delimiter: &str| match list {
1192            upon::Value::List(list) => { //too much cloning but it'll do for now
1193                let newlist: Vec<String> = list.iter().map(|v| match v {
1194                    upon::Value::String(s) => s.clone(),
1195                    upon::Value::Integer(d) => format!("{}",d),
1196                    upon::Value::Float(d) => format!("{}",d),
1197                    upon::Value::Bool(d) => format!("{}",d),
1198                    _ => String::new(),
1199                }).collect();
1200                upon::Value::String(newlist.join(delimiter))
1201            },
1202            _ => {
1203                list.clone() //was not really a list after all, just pass it on so we don't need to panic
1204            }
1205        });
1206        let mut converter = Self {
1207            cursor: 0,
1208            text: String::new(),
1209            template_engine,
1210            positionmap: HashMap::new(),
1211            bytepositionmap: HashMap::new(),
1212            scopes:  HashMap::new(),
1213            markers: HashMap::new(),
1214            resource_handle: None,
1215            pending_whitespace: false,
1216            global_context: BTreeMap::new(),
1217            debugindent: String::new(),
1218            variables: BTreeMap::new(),
1219            prefixes,
1220            config,
1221        };
1222        converter.set_global_context();
1223        converter.add_external_filters();
1224        converter
1225    }
1226
1227    fn add_external_filters(&mut self) {
1228        for filter in self.config.external_filters.clone() {
1229            self.template_engine.add_function(filter.name.clone(), move |value: &upon::Value| filter.run(value)  );
1230        }
1231    }
1232
1233    /// Compile templates
1234    fn compile(&mut self) -> Result<(), XmlConversionError> {
1235        if self.config.debug {
1236            eprintln!("[STAM fromxml] compiling templates");
1237        }
1238        for element in self.config.elements.iter() {
1239            if let Some(textprefix) = element.textprefix.as_ref() {
1240                if self.template_engine.get_template(textprefix.as_str()).is_none() {
1241                    let template = self.precompile(textprefix.as_str());
1242                    self.template_engine
1243                        .add_template(textprefix.clone(), template)
1244                        .map_err(|e| {
1245                            XmlConversionError::TemplateError(
1246                                format!("element/textprefix template {}", textprefix.clone()),
1247                                Some(e),
1248                            )
1249                        })?;
1250                }
1251            }
1252            if let Some(textsuffix) = element.textsuffix.as_ref() {
1253                if self.template_engine.get_template(textsuffix.as_str()).is_none() {
1254                    let template = self.precompile(textsuffix.as_str());
1255                    self.template_engine
1256                        .add_template(textsuffix.clone(), template)
1257                        .map_err(|e| {
1258                            XmlConversionError::TemplateError(
1259                                format!("element/textsuffix template {}", textsuffix.clone()),
1260                                Some(e),
1261                            )
1262                        })?;
1263                }
1264            }
1265            if let Some(id) = element.id.as_ref() {
1266                if self.template_engine.get_template(id.as_str()).is_none() {
1267                    let template = self.precompile(id.as_str());
1268                    self.template_engine.add_template(id.clone(), template).map_err(|e| {
1269                        XmlConversionError::TemplateError(
1270                            format!("element/id template {}", id.clone()),
1271                            Some(e),
1272                        )
1273                    })?;
1274                }
1275            }
1276            for annotationdata in element.annotationdata.iter().chain(element.annotatetextprefix.iter()).chain(element.annotatetextsuffix.iter()) {
1277                if let Some(id) = annotationdata.id.as_ref() {
1278                    if self.template_engine.get_template(id.as_str()).is_none() {
1279                        let template = self.precompile(id.as_str());
1280                        self.template_engine.add_template(id.clone(), template).map_err(|e| {
1281                            XmlConversionError::TemplateError(
1282                                format!("annotationdata/id template {}", id.clone()),
1283                                Some(e),
1284                            )
1285                        })?;
1286                    }
1287                }
1288                if let Some(set) = annotationdata.set.as_ref() {
1289                    if self.template_engine.get_template(set.as_str()).is_none() {
1290                        let template = self.precompile(set.as_str());
1291                        //eprintln!("------- DEBUG: {} -> {}", set.as_str(), template);
1292                        self.template_engine.add_template(set.clone(), template).map_err(|e| {
1293                            XmlConversionError::TemplateError(
1294                                format!("annotationdata/set template {}", set.clone()),
1295                                Some(e),
1296                            )
1297                        })?;
1298                    }
1299                }
1300                if let Some(key) = annotationdata.key.as_ref() {
1301                    if self.template_engine.get_template(key.as_str()).is_none() {
1302                        let template = self.precompile(key.as_str());
1303                        self.template_engine.add_template(key.clone(), template).map_err(|e| {
1304                            XmlConversionError::TemplateError(
1305                                format!("annotationdata/key template {}", key.clone()),
1306                                Some(e),
1307                            )
1308                        })?;
1309                    }
1310                }
1311                if let Some(value) = annotationdata.value.as_ref() {
1312                    self.compile_value(value)?;
1313                }
1314            }
1315        }
1316        for metadata in self.config.metadata.iter() {
1317            if let Some(id) = metadata.id.as_ref() {
1318                if self.template_engine.get_template(id.as_str()).is_none() {
1319                    let template = self.precompile(id.as_str());
1320                    self.template_engine.add_template(id.clone(), template).map_err(|e| {
1321                        XmlConversionError::TemplateError(
1322                            format!("metadata/id template {}", id.clone()),
1323                            Some(e),
1324                        )
1325                    })?;
1326                }
1327            }
1328            for annotationdata in metadata.annotationdata.iter() {
1329                if let Some(id) = annotationdata.id.as_ref() {
1330                    if self.template_engine.get_template(id.as_str()).is_none() {
1331                        let template = self.precompile(id.as_str());
1332                        self.template_engine.add_template(id.clone(), template).map_err(|e| {
1333                            XmlConversionError::TemplateError(
1334                                format!("annotationdata/id template {}", id.clone()),
1335                                Some(e),
1336                            )
1337                        })?;
1338                    }
1339                }
1340                if let Some(set) = annotationdata.set.as_ref() {
1341                    if self.template_engine.get_template(set.as_str()).is_none() {
1342                        let template = self.precompile(set.as_str());
1343                        //eprintln!("------- DEBUG: {} -> {}", set.as_str(), template);
1344                        self.template_engine.add_template(set.clone(), template).map_err(|e| {
1345                            XmlConversionError::TemplateError(
1346                                format!("annotationdata/set template {}", set.clone()),
1347                                Some(e),
1348                            )
1349                        })?;
1350                    }
1351                }
1352                if let Some(key) = annotationdata.key.as_ref() {
1353                    if self.template_engine.get_template(key.as_str()).is_none() {
1354                        let template = self.precompile(key.as_str());
1355                        self.template_engine.add_template(key.clone(), template).map_err(|e| {
1356                            XmlConversionError::TemplateError(
1357                                format!("annotationdata/key template {}", key.clone()),
1358                                Some(e),
1359                            )
1360                        })?;
1361                    }
1362                }
1363                if let Some(value) = annotationdata.value.as_ref() {
1364                    self.compile_value(value)?;
1365                }
1366            }
1367        }
1368        Ok(())
1369    }
1370
1371    /// Compile templates from a value, all strings are considered templates
1372    fn compile_value(&mut self, value: &'a toml::Value) -> Result<(), XmlConversionError> {
1373        match value {
1374            toml::Value::String(value) => {
1375                if self.template_engine.get_template(value.as_str()).is_none() {
1376                    let template = self.precompile(value.as_str());
1377                    self.template_engine.add_template(value.clone(), template).map_err(|e| {
1378                        XmlConversionError::TemplateError(
1379                            format!("annotationdata/value template {}", value.clone()),
1380                            Some(e),
1381                        )
1382                    })?;
1383                }
1384            }
1385            toml::Value::Table(map) => {
1386                for (_key, value) in map.iter() {
1387                    self.compile_value(value)?;
1388                }
1389            },
1390            toml::Value::Array(list) => {
1391                for value in list.iter() {
1392                    self.compile_value(value)?;
1393                }
1394            }
1395            _ => {} //no templates in other types
1396        }
1397        Ok(())
1398    }
1399
1400    /// untangle text, extract the text (and only the text)
1401    /// from an XML document, according to the
1402    /// mapping configuration and creates a STAM TextResource for it.
1403    /// Records exact offsets per element/node for later use during annotation extraction.
1404    fn extract_element_text<'b>(
1405        &mut self,
1406        node: Node<'a,'b>,
1407        path: &NodePath<'a,'b>,
1408        whitespace: XmlWhitespaceHandling,
1409        resource_id: Option<&str>,
1410        inputfile: Option<&str>,
1411        doc_num: usize,
1412    ) -> Result<(), XmlConversionError> {
1413        if self.config.debug {
1414            eprintln!("[STAM fromxml]{} extracting text for element {}", self.debugindent, path);
1415        }
1416        let mut begin = self.cursor; //current character pos marks the begin
1417        let mut bytebegin = self.text.len(); //current byte pos marks the begin
1418        let mut end_discount = 0; //the discount may be needed later if textsuffixes are outputted (which we do not want as part of the annotation)
1419        let mut end_bytediscount = 0;
1420        let mut firsttext = true; //tracks whether we have already outputted some text, needed for whitespace handling
1421
1422        let mut elder_siblings = SiblingCounter::default();
1423
1424        // obtain the configuration that applies to this element
1425        if let Some(element_config) = self.config.element_config(node, path) {
1426            if self.config.debug {
1427                eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1428            }
1429
1430            if (element_config.stop == Some(false) || element_config.stop.is_none())
1431                && element_config.annotation != XmlAnnotationHandling::TextSelectorBetweenMarkers
1432            {
1433                //do text extraction for this element
1434
1435                let whitespace = if node.has_attribute((NS_XML, "space")) {
1436                    // if there is an explicit xml:space attributes, it overrides whatever whitespace handling we have set:
1437                    match node.attribute((NS_XML, "space")).unwrap() {
1438                        "preserve" => XmlWhitespaceHandling::Preserve,
1439                        "collapse" | "replace" => XmlWhitespaceHandling::Collapse,
1440                        _ => whitespace,
1441                    }
1442                } else if element_config.whitespace == XmlWhitespaceHandling::Inherit
1443                    || element_config.whitespace == XmlWhitespaceHandling::Unspecified
1444                {
1445                    whitespace //from parent, i.e. passed to this (recursive) function by caller
1446                } else {
1447                    element_config.whitespace //default from the config
1448                };
1449
1450                // process the text prefix, a text template to include prior to the actual text
1451                self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1452
1453                let textbegin = self.cursor;
1454                // process all child elements
1455                for child in node.children() {
1456                    if self.config.debug {
1457                        eprintln!("[STAM fromxml]{} child {:?}", self.debugindent, child);
1458                    }
1459                    if child.is_text() && element_config.text == Some(true) {
1460                        // extract the actual element text
1461                        // this may trigger multiple times if the XML element (`node`) has mixed content
1462
1463                        let mut innertext = child.text().expect("text node must have text");
1464                        let mut pending_whitespace = false;
1465                        let mut leading_whitespace = false;
1466                        if whitespace == XmlWhitespaceHandling::Collapse && !innertext.is_empty() {
1467                            // analyse what kind of whitespace we are dealing with
1468                            let mut all_whitespace = true;
1469                            leading_whitespace = innertext.chars().next().unwrap().is_whitespace();
1470
1471                            // any pending whitespace after this elements is 'buffered' in this boolean
1472                            // and only written out depending on the next text's whitespace situation
1473                            pending_whitespace = innertext
1474                                .chars()
1475                                .inspect(|c| {
1476                                    if !c.is_whitespace() {
1477                                        all_whitespace = false
1478                                    }
1479                                })
1480                                .last()
1481                                .unwrap()
1482                                .is_whitespace();
1483                            if all_whitespace {
1484                                self.pending_whitespace = true;
1485                                if self.config.debug {
1486                                    eprintln!(
1487                                        "[STAM fromxml]{} ^- all whitespace, flag pending whitespace and skipping...",
1488                                        self.debugindent,
1489                                    );
1490                                }
1491                                continue;
1492                            }
1493                            innertext = innertext.trim();
1494                            if self.config.debug {
1495                                eprintln!(
1496                                    "[STAM fromxml]{} ^- collapsed whitespace: {:?}",
1497                                    self.debugindent,
1498                                    innertext
1499                                );
1500                            }
1501                        }
1502                        if self.pending_whitespace || leading_whitespace {
1503                            //output any pending whitespace
1504                            if !self.text.is_empty()
1505                                && !self.text.chars().rev().next().unwrap().is_whitespace()
1506                            {
1507                                if self.config.debug {
1508                                    eprintln!("[STAM fromxml]{} ^- outputting pending whitespace",self.debugindent);
1509                                }
1510                                self.text.push(' ');
1511                                self.cursor += 1;
1512                                if firsttext && self.pending_whitespace {
1513                                    begin += 1;
1514                                    bytebegin += 1;
1515                                    firsttext = false;
1516                                }
1517                            }
1518                            self.pending_whitespace = false;
1519                        }
1520
1521                        // finally we output the actual text, and advance the cursor
1522                        if whitespace == XmlWhitespaceHandling::Collapse {
1523                            let mut prevc = ' ';
1524                            let mut innertext = innertext.replace(|c: char| c.is_whitespace(), " ");
1525                            innertext.retain(|c| {
1526                                let do_retain = c != ' ' || prevc != ' ';
1527                                prevc = c;
1528                                do_retain
1529                            });
1530                            self.text += &innertext;
1531                            self.cursor += innertext.chars().count();
1532                            if self.config.debug {
1533                                eprintln!("[STAM fromxml]{} ^- outputting text child (collapsed whitespace), cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1534                            }
1535                        } else {
1536                            self.text += &innertext;
1537                            self.cursor += innertext.chars().count();
1538                            if self.config.debug {
1539                                eprintln!("[STAM fromxml]{} ^- outputting text child, cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1540                            }
1541                        }
1542                        self.pending_whitespace = pending_whitespace;
1543                    } else if child.is_element() {
1544                        if self.config.debug {
1545                            eprintln!("[STAM fromxml]{} \\- extracting text for this child", self.debugindent);
1546                        }
1547                        self.debugindent.push_str("  ");
1548                        // recursion step, process child element, pass our whitespace handling mode since it may inherit it
1549                        let mut path = path.clone();
1550                        let count = elder_siblings.count(&child);
1551                        path.add(&child, Some(count));
1552                        self.extract_element_text(child, &path, whitespace, resource_id, inputfile, doc_num)?;
1553                        self.debugindent.pop();
1554                        self.debugindent.pop();
1555                    } else {
1556                        if self.config.debug {
1557                            eprintln!("[STAM fromxml]{} ^- skipping this child node", self.debugindent);
1558                        }
1559                        continue;
1560                    }
1561                }
1562
1563                // process the text suffix, a preconfigured string of text to include after to the actual text
1564                self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, textbegin)?;
1565
1566                // Assign a scope ID if provided (used for constraining the scope of marker annotations later on)
1567                if let Some(scope_id) = element_config.scope_id.as_ref() {
1568                    self.scopes.insert( scope_id.clone(), (doc_num, node.id()) );
1569                }
1570            } else if element_config.annotation == XmlAnnotationHandling::TextSelectorBetweenMarkers
1571            {
1572                // this is a marker, keep track of it so we can extract the span between markers in [`extract_element_annotation()`] later
1573                if self.config.debug {
1574                    eprintln!("[STAM fromxml]{} adding to markers (textprefix={:?}, textsuffix={:?})", self.debugindent, element_config.textprefix, element_config.textsuffix);
1575                }
1576
1577
1578                self.markers
1579                    .entry(element_config.hash())
1580                    .and_modify(|v| v.push((doc_num, node.id())))
1581                    .or_insert(vec![(doc_num, node.id())]);
1582
1583                // for markers it doesn't matter whether something text is defined as a prefix or suffix, it's functionally the same because a marker has no text itself
1584
1585                self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1586                self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, self.cursor)?;
1587            }
1588        } else if self.config.debug {
1589            eprintln!(
1590                "[STAM fromxml]{} WARNING: no match, skipping text extraction for element {}",
1591                self.debugindent,
1592                path
1593            );
1594        }
1595
1596        // Last, we store the new text offsets for this element/node so
1597        // we can use it in [`extract_element_annotation()`] to associate
1598        // actual annotations with this span.
1599        if begin <= (self.cursor - end_discount) {
1600            let offset = Offset::simple(begin, self.cursor - end_discount);
1601            if self.config.debug {
1602                eprintln!(
1603                    "[STAM fromxml]{} extracted text for {} @{:?}: {:?}",
1604                    self.debugindent,
1605                    path,
1606                    &offset,
1607                    &self.text[bytebegin..(self.text.len() - end_bytediscount)]
1608                );
1609            }
1610            self.positionmap.insert((doc_num, node.id(), PositionType::Body), offset);
1611            self.bytepositionmap
1612                .insert((doc_num, node.id(), PositionType::Body), (bytebegin, self.text.len() - end_bytediscount));
1613        }
1614        Ok(())
1615    }
1616
1617    /// process the text prefix, a text template to include prior to the actual text
1618    fn process_textprefix<'b>(
1619        &mut self,
1620        element_config: &XmlElementConfig,
1621        node: Node<'a,'b>,
1622        resource_id: Option<&str>,
1623        inputfile: Option<&str>,
1624        doc_num: usize,
1625        begin: &mut usize,
1626        bytebegin: &mut usize
1627    ) -> Result<(), XmlConversionError> {
1628        if let Some(textprefix) = &element_config.textprefix {
1629            self.pending_whitespace = false;
1630            if self.config.debug {
1631                eprintln!("[STAM fromxml]{} outputting textprefix: {:?}", self.debugindent, textprefix);
1632            }
1633            let result =
1634                self.render_template(textprefix, &node, Some(self.cursor), None, resource_id, inputfile, doc_num)
1635                    .map_err(|e| match e {
1636                        XmlConversionError::TemplateError(s, e) => {
1637                            XmlConversionError::TemplateError(
1638                                format!(
1639                                "whilst rendering textprefix template '{}' for node '{}': {}",
1640                                textprefix, node.tag_name().name(), s
1641                            ),
1642                                e,
1643                            )
1644                        }
1645                        e => e,
1646                    })?;
1647            let result_charlen = result.chars().count();
1648
1649            if !element_config.annotatetextprefix.is_empty() {
1650                //record the offsets for textprefix annotation later
1651                let offset = Offset::simple(self.cursor, self.cursor + result_charlen);
1652                self.positionmap.insert((doc_num, node.id(), PositionType::TextPrefix), offset);
1653                self.bytepositionmap
1654                    .insert((doc_num, node.id(), PositionType::TextPrefix), (*bytebegin, *bytebegin + result.len()));
1655            }
1656
1657            self.cursor += result_charlen;
1658            self.text += &result;
1659
1660            if element_config.include_textprefix != Some(true) {
1661                // the textprefix will not be part of the annotation's text selection, increment the offsets:
1662                *begin += result_charlen;
1663                *bytebegin += result.len();
1664            }
1665        }
1666        Ok(())
1667    }
1668
1669    /// process the text suffix, a preconfigured string of text to include after to the actual text
1670    fn process_textsuffix<'b>(
1671        &mut self,
1672        element_config: &XmlElementConfig,
1673        node: Node<'a,'b>,
1674        resource_id: Option<&str>,
1675        inputfile: Option<&str>,
1676        doc_num: usize,
1677        end_discount: &mut usize,
1678        end_bytediscount: &mut usize,
1679        textbegin: usize,
1680    ) -> Result<(), XmlConversionError> {
1681        if let Some(textsuffix) = &element_config.textsuffix {
1682            if self.config.debug {
1683                eprintln!("[STAM fromxml]{} outputting textsuffix: {:?}", self.debugindent, textsuffix);
1684            }
1685            let result = self.render_template(
1686                textsuffix.as_str(),
1687                &node,
1688                Some(textbegin),
1689                Some(self.cursor),
1690                resource_id,
1691                inputfile,
1692                doc_num
1693            ).map_err(|e| match e {
1694                    XmlConversionError::TemplateError(s, e) => {
1695                        XmlConversionError::TemplateError(
1696                            format!(
1697                                "whilst rendering textsuffix template '{}' for node '{}': {}",
1698                                textsuffix,
1699                                node.tag_name().name(),
1700                                s
1701                            ),
1702                            e,
1703                        )
1704                    }
1705                    e => e,
1706            })?;
1707            let end_discount_tmp = result.chars().count();
1708            let end_bytediscount_tmp = result.len();
1709
1710
1711            self.text += &result;
1712
1713            if !element_config.annotatetextsuffix.is_empty() {
1714                //record the offsets for textsuffix annotation later
1715                let offset = Offset::simple(self.cursor, self.cursor + end_discount_tmp);
1716                self.positionmap.insert((doc_num, node.id(), PositionType::TextSuffix), offset);
1717                self.bytepositionmap
1718                    .insert((doc_num, node.id(), PositionType::TextSuffix), (self.text.len() - end_bytediscount_tmp, self.text.len()));
1719            }
1720
1721            self.cursor += end_discount_tmp;
1722            self.pending_whitespace = false;
1723
1724            if element_config.include_textsuffix == Some(true) {
1725                // the textsuffix will be part of the annotation's text selection, no discount for later
1726                *end_discount = 0;
1727                *end_bytediscount = 0;
1728            } else {
1729                // the textsuffix will not be part of the annotation's text selection, set discounts for later
1730                *end_discount = end_discount_tmp;
1731                *end_bytediscount = end_bytediscount_tmp;
1732            }
1733        }
1734        Ok(())
1735    }
1736
1737    /// extract annotations from the XML document
1738    /// according to the mapping configuration and creates a STAM TextResource for it.
1739    /// The text, for the full document, must have already been extracted earlier with [`extract_element_text()`].
1740    /// This relies on the exact offsets per element/node computed earlier during text extraction (`positionmap`).
1741    fn extract_element_annotation<'b>(
1742        &mut self,
1743        node: Node<'a,'b>,
1744        path: &NodePath<'a,'b>,
1745        inputfile: Option<&str>,
1746        doc_num: usize,
1747        store: &mut AnnotationStore,
1748    ) -> Result<(), XmlConversionError> {
1749        if self.config.debug {
1750            eprintln!("[STAM fromxml]{} extracting annotation from {}", self.debugindent, path);
1751        }
1752
1753        let mut elder_siblings = SiblingCounter::default();
1754
1755        // obtain the configuration that applies to this element
1756        if let Some(element_config) = self.config.element_config(node, &path) {
1757            if self.config.debug {
1758                eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1759            }
1760            if element_config.annotation != XmlAnnotationHandling::None
1761                && element_config.annotation != XmlAnnotationHandling::Unspecified
1762            {
1763                let mut builder = AnnotationBuilder::new();
1764
1765                //prepare variables to pass to the template context
1766                let offset = self.positionmap.get(&(doc_num, node.id(), PositionType::Body));
1767                if element_config.annotation == XmlAnnotationHandling::TextSelector {
1768                    if let Some((beginbyte, endbyte)) = self.bytepositionmap.get(&(doc_num, node.id(), PositionType::Body)) {
1769                        if self.config.debug {
1770                            eprintln!("[STAM fromxml]{} annotation covers text {:?} (bytes {}-{})", self.debugindent, offset, beginbyte, endbyte);
1771                        }
1772                    }  else if self.text.is_empty() {
1773                        return Err(XmlConversionError::ConfigError("Can't extract annotations on text if no text was extracted!".into()));
1774                    }
1775                }
1776                let begin = if let Some(offset) = offset {
1777                    if let Cursor::BeginAligned(begin) = offset.begin {
1778                        Some(begin)
1779                    } else {
1780                        None
1781                    }
1782                } else {
1783                    None
1784                };
1785                let end = if let Some(offset) = offset {
1786                    if let Cursor::BeginAligned(end) = offset.end {
1787                        Some(end)
1788                    } else {
1789                        None
1790                    }
1791                } else {
1792                    None
1793                };
1794
1795                let resource_id = if let Some(resource_handle) = self.resource_handle {
1796                    store.resource(resource_handle).unwrap().id()
1797                } else {
1798                    None
1799                };
1800
1801                let mut have_id = false;
1802                if let Some(template) = &element_config.id {
1803                    let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1804                    let compiled_template = self.template_engine.template(template.as_str());
1805                    let id = compiled_template.render(&context).to_string().map_err(|e| 
1806                            XmlConversionError::TemplateError(
1807                                format!(
1808                                    "whilst rendering id template '{}' for node '{}'",
1809                                    template,
1810                                    node.tag_name().name(),
1811                                ),
1812                                Some(e),
1813                            )
1814                        )?;
1815                    if !id.is_empty() {
1816                        builder = builder.with_id(id);
1817                        have_id = true;
1818                    }
1819                }
1820
1821                if !have_id {
1822                    //generate a random ID if we have none
1823                    if let Some(resource_id) = resource_id {
1824                        builder = builder.with_id(stam::generate_id(&format!("{}-",resource_id), ""));
1825                    } else {
1826                        builder = builder.with_id(stam::generate_id("", ""));
1827                    }
1828                }
1829
1830                builder = self.add_annotationdata_to_builder(element_config.annotationdata.iter(), builder, node.clone(), begin, end, resource_id, inputfile, doc_num)?;
1831
1832
1833                if self.config.provenance  && inputfile.is_some() {
1834                    let path_string = if let Some(id) = node.attribute((NS_XML,"id")) {
1835                        //node has an ID, use that
1836                        format!("//{}[@xml:id=\"{}\"]", self.get_node_name_for_xpath(&node), id)
1837                    } else {
1838                        //no ID, use full XPath expression
1839                        path.format_as_xpath(&self.prefixes)
1840                    };
1841                    let databuilder = AnnotationDataBuilder::new().with_dataset(CONTEXT_ANNO.into()).with_key("target".into()).with_value(
1842                        BTreeMap::from([
1843                            ("source".to_string(),inputfile.unwrap().into()),
1844                            ("selector".to_string(), 
1845                                    BTreeMap::from([
1846                                        ("type".to_string(),"XPathSelector".into()),
1847                                        ("value".to_string(),path_string.into())
1848                                    ]).into()
1849                            )
1850                        ]).into()
1851                    );
1852                    builder = builder.with_data_builder(databuilder);
1853                }
1854
1855
1856                // Finish the builder and add the actual annotation to the store, according to its element handling
1857                match element_config.annotation {
1858                    XmlAnnotationHandling::TextSelector => {
1859                        // Annotation is on text, translates to TextSelector
1860                        if let Some(selector) = self.textselector(node, doc_num, PositionType::Body) {
1861                            builder = builder.with_target(selector);
1862                            if self.config.debug {
1863                                eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
1864                            }
1865                            store.annotate(builder)?;
1866                        }
1867                        if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1868                            self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1869                        }
1870                    }
1871                    XmlAnnotationHandling::ResourceSelector => {
1872                        // Annotation is metadata, translates to ResourceSelector
1873                        builder = builder.with_target(SelectorBuilder::ResourceSelector(
1874                            self.resource_handle.into(),
1875                        ));
1876                        if self.config.debug {
1877                            eprintln!("[STAM fromxml]   builder AnnotateResource: {:?}", builder);
1878                        }
1879                        store.annotate(builder)?;
1880                    }
1881                    XmlAnnotationHandling::TextSelectorBetweenMarkers => {
1882                        // Annotation is on a text span *between* two marker elements
1883                        if let Some(selector) =
1884                            self.textselector_for_markers(node, doc_num, store, element_config)
1885                        {
1886                            builder = builder.with_target(selector);
1887                            if self.config.debug {
1888                                eprintln!(
1889                                    "[STAM fromxml]   builder TextSelectorBetweenMarkers: {:?}",
1890                                    builder
1891                                );
1892                            }
1893                            store.annotate(builder)?;
1894                            if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1895                                self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1896                            }
1897                        }
1898                    }
1899                    _ => panic!(
1900                        "Invalid annotationhandling: {:?}",
1901                        element_config.annotation
1902                    ),
1903                }
1904            }
1905
1906            // Recursion step
1907            if element_config.stop == Some(false) || element_config.stop.is_none() {
1908                for child in node.children() {
1909                    if child.is_element() {
1910                        self.debugindent.push_str("  ");
1911                        let mut path = path.clone();
1912                        let count = elder_siblings.count(&child);
1913                        path.add(&child, Some(count));
1914                        //eprintln!("DEBUG: count={}, child={:?}, parent={:?}, elder_siblings={:?}", count, child.tag_name(), node.tag_name(), elder_siblings);
1915                        self.extract_element_annotation(child, &path, inputfile, doc_num, store)?;
1916                        self.debugindent.pop();
1917                        self.debugindent.pop();
1918                    }
1919                }
1920            }
1921        } else {
1922            eprintln!(
1923                "[STAM fromxml]{} WARNING: no match, skipping annotation extraction for element {}",
1924                self.debugindent,
1925                path
1926            );
1927        }
1928        Ok(())
1929    }
1930
1931    fn add_annotationdata_to_builder<'input>(&self, iter: impl Iterator<Item = &'a XmlAnnotationDataConfig>,
1932        mut builder: AnnotationBuilder<'a>,
1933        node: Node<'a, 'input>,
1934        begin: Option<usize>,
1935        end: Option<usize>,
1936        resource_id: Option<&str>,
1937        inputfile: Option<&str>,
1938        doc_num: usize,
1939    ) -> Result<AnnotationBuilder<'a>, XmlConversionError> {
1940        for annotationdata in iter {
1941            let mut databuilder = AnnotationDataBuilder::new();
1942            if let Some(template) = &annotationdata.set {
1943                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1944                let compiled_template = self.template_engine.template(template.as_str());
1945                let dataset = compiled_template.render(&context).to_string().map_err(|e| 
1946                        XmlConversionError::TemplateError(
1947                            format!(
1948                                "whilst rendering annotationdata/dataset template '{}' for node '{}'",
1949                                template,
1950                                node.tag_name().name(),
1951                            ),
1952                            Some(e),
1953                        )
1954                    )?;
1955                if !dataset.is_empty() {
1956                    databuilder = databuilder.with_dataset(dataset.into())
1957                }
1958            } else {
1959                databuilder =
1960                    databuilder.with_dataset(self.config.default_set.as_str().into());
1961            }
1962            if let Some(template) = &annotationdata.key {
1963                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1964                let compiled_template = self.template_engine.template(template.as_str());
1965                match compiled_template.render(&context).to_string().map_err(|e| 
1966                        XmlConversionError::TemplateError(
1967                            format!(
1968                                "whilst rendering annotationdata/key template '{}' for node '{}'",
1969                                template,
1970                                node.tag_name().name(),
1971                            ),
1972                            Some(e),
1973                        )
1974                    )  {
1975                    Ok(key) if !key.is_empty() =>
1976                        databuilder = databuilder.with_key(key.into()) ,
1977                    Ok(_) if !annotationdata.skip_if_missing => {
1978                        return Err(XmlConversionError::TemplateError(
1979                            format!(
1980                                "whilst rendering annotationdata/key template '{}' for node '{}'",
1981                                template,
1982                                node.tag_name().name(),
1983                            ),
1984                            None
1985                        ));
1986                    },
1987                    Err(e) if !annotationdata.skip_if_missing => {
1988                        return Err(e)
1989                    },
1990                    _ => {
1991                        //skip whole databuilder if missing
1992                        continue
1993                    }
1994                }
1995            }
1996            if let Some(value) = &annotationdata.value {
1997                match self.extract_value(value,  node, annotationdata.allow_empty_value, annotationdata.skip_if_missing, annotationdata.valuetype.as_ref().map(|s| s.as_str()), begin, end, resource_id, inputfile, doc_num)? {
1998                    Some(DataValue::List(values)) if annotationdata.multiple => {
1999                        for value in values {
2000                            let mut databuilder_multi = databuilder.clone();
2001                            databuilder_multi = databuilder_multi.with_value(value);
2002                            builder = builder.with_data_builder(databuilder_multi);
2003                        }
2004                    },
2005                    Some(value) => {
2006                        databuilder = databuilder.with_value(value);
2007                    },
2008                    None =>  {
2009                        //skip whole databuilder if missing
2010                        continue
2011                    }
2012                }
2013            }
2014            if !annotationdata.multiple {
2015                builder = builder.with_data_builder(databuilder);
2016            }
2017        }
2018        Ok(builder)
2019    }
2020
2021    /// Annotates textprefix and textsuffix, if applicable
2022    fn annotate_textaffixes<'b>(
2023        &mut self,
2024        node: Node<'a,'b>,
2025        element_config: &XmlElementConfig,
2026        inputfile: Option<&str>,
2027        doc_num: usize,
2028        store: &mut AnnotationStore,
2029    ) -> Result<(), XmlConversionError> {
2030
2031
2032        if !element_config.annotatetextprefix.is_empty() {
2033            let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textprefix-", ""));
2034            if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextPrefix)) {
2035                let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2036                        Some(begin)
2037                    } else {
2038                        None
2039                    };
2040                let end = if let Cursor::BeginAligned(end) = offset.end {
2041                        Some(end)
2042                    } else {
2043                        None
2044                    };
2045                builder = self.add_annotationdata_to_builder(element_config.annotatetextprefix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; //MAYBE TODO: pass resource_id
2046                if let Some(selector) = self.textselector(node, doc_num, PositionType::TextPrefix) {
2047                    builder = builder.with_target(selector);
2048                    if self.config.debug {
2049                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2050                    }
2051                    store.annotate(builder)?;
2052                } else {
2053                    return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2054                }
2055            }
2056        }
2057
2058        if !element_config.annotatetextsuffix.is_empty() {
2059            let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textsuffix-", ""));
2060            if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextSuffix)) {
2061                let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2062                        Some(begin)
2063                    } else {
2064                        None
2065                    };
2066                let end = if let Cursor::BeginAligned(end) = offset.end {
2067                        Some(end)
2068                    } else {
2069                        None
2070                    };
2071                builder = self.add_annotationdata_to_builder(element_config.annotatetextsuffix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; //MAYBE TODO: pass resource_id
2072                if let Some(selector) = self.textselector(node, doc_num, PositionType::TextSuffix) {
2073                    builder = builder.with_target(selector);
2074                    if self.config.debug {
2075                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2076                    }
2077                    store.annotate(builder)?;
2078                } else {
2079                    return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2080                }
2081            }
2082        }
2083        Ok(())
2084    }
2085
2086    /// Extract values, running the templating engine in case of string values
2087    fn extract_value<'b>(&self, value: &'a toml::Value, node: Node<'a,'b>, allow_empty_value: bool, skip_if_missing: bool, valuetype: Option<&str>, begin: Option<usize>, end: Option<usize>, resource_id: Option<&str>, inputfile: Option<&str>, doc_num: usize) -> Result<Option<DataValue>, XmlConversionError>{
2088        match value {
2089            toml::Value::String(template) => {  
2090                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
2091                /*
2092                if self.config.debug() {
2093                    eprintln!(
2094                        "[STAM fromxml]              Context for annotationdata/map template '{}' for node '{}': {:?}",
2095                        template,
2096                        node.tag_name().name(),
2097                        context
2098                    );
2099                }
2100                */
2101                let compiled_template = self.template_engine.template(template.as_str()); //panics if doesn't exist, but that can't happen
2102                match compiled_template.render(&context).to_string().map_err(|e| 
2103                        XmlConversionError::TemplateError(
2104                            format!(
2105                                "whilst rendering annotationdata/map template '{}' for node '{}'.{}",
2106                                template,
2107                                node.tag_name().name(),
2108                                if self.config.debug() {
2109                                    format!("\nContext was {:?}.\nVariables are: {:?}", context, self.variables.get(template))
2110                                } else {
2111                                    String::new()
2112                                }
2113                            ),
2114                            Some(e),
2115                        )
2116                    )  {
2117                    Ok(value) => {
2118                        if !value.is_empty() || allow_empty_value {
2119                            string_to_datavalue(value, valuetype).map(|v| Some(v))
2120                        } else {
2121                            //skip
2122                            Ok(None)
2123                        }
2124                    },
2125                    Err(e) if !skip_if_missing => {
2126                        Err(e)
2127                    },
2128                    Err(_) if allow_empty_value => {
2129                        Ok(Some("".into()))
2130                    },
2131                    Err(_) => {
2132                        //skip whole databuilder if missing
2133                        Ok(None)
2134                    }
2135                }
2136            },
2137            toml::Value::Table(map) => {
2138                let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2139                for (key, value) in map.iter() {
2140                    if let Some(value) = self.extract_value(value,  node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2141                        resultmap.insert(key.clone(), value);
2142                    }
2143                }
2144                Ok(Some(resultmap.into()))
2145            },
2146            toml::Value::Array(list) => {
2147                let mut resultlist: Vec<DataValue> = Vec::new();
2148                for value in list.iter() {
2149                    if let Some(value) = self.extract_value(value, node, false, true, None,  begin, end, resource_id, inputfile, doc_num)? {
2150                        resultlist.push(value);
2151                    }
2152                }
2153                Ok(Some(resultlist.into()))
2154            }
2155            toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2156            toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2157            toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2158            toml::Value::Datetime(_v) => {
2159                todo!("fromxml: Datetime conversion not implemented yet");
2160            }
2161        }
2162    }
2163
2164    /// Extract values for metadata (no associated node), running the templating engine in case of string values
2165    fn extract_value_metadata<'b>(&self, value: &'a toml::Value, context: &upon::Value, allow_empty_value: bool, skip_if_missing: bool, resource_id: Option<&str>) -> Result<Option<DataValue>, XmlConversionError>{
2166        match value {
2167            toml::Value::String(template) => {  
2168                let compiled_template = self.template_engine.template(template.as_str()); //panics if doesn't exist, but that can't happen
2169                match compiled_template.render(&context).to_string().map_err(|e| 
2170                        XmlConversionError::TemplateError(
2171                            format!(
2172                                "whilst rendering annotationdata/metadata template '{}' for metadata",
2173                                template,
2174                            ),
2175                            Some(e),
2176                        )
2177                    )  {
2178                    Ok(value) => {
2179                        if !value.is_empty() || allow_empty_value {
2180                            Ok(Some(value.into()))
2181                        } else {
2182                            //skip
2183                            Ok(None)
2184                        }
2185                    },
2186                    Err(e) if !skip_if_missing => {
2187                        Err(e)
2188                    },
2189                    Err(_) if allow_empty_value => {
2190                        Ok(Some("".into()))
2191                    },
2192                    Err(_) => {
2193                        //skip whole databuilder if missing
2194                        Ok(None)
2195                    }
2196                }
2197            },
2198            toml::Value::Table(map) => {  
2199                let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2200                for (key, value) in map.iter() {
2201                    if let Some(value) = self.extract_value_metadata(value, context, false, true,  resource_id)? {
2202                        resultmap.insert(key.clone(), value);
2203                    }
2204                }
2205                Ok(Some(resultmap.into()))
2206            },
2207            toml::Value::Array(list) => {  
2208                let mut resultlist: Vec<DataValue> = Vec::new();
2209                for value in list.iter() {
2210                    if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2211                        resultlist.push(value);
2212                    }
2213                }
2214                Ok(Some(resultlist.into()))
2215            }
2216            toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2217            toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2218            toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2219            toml::Value::Datetime(_v) => {
2220                todo!("fromxml: Datetime conversion not implemented yet");
2221            }
2222        }
2223    }
2224
2225    /// Select text corresponding to the element/node and document number
2226    fn textselector<'s>(&'s self, node: Node, doc_num: usize, positiontype: PositionType) -> Option<SelectorBuilder<'s>> {
2227        let res_handle = self.resource_handle.expect("resource must be associated");
2228        if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), positiontype)) {
2229            Some(SelectorBuilder::TextSelector(
2230                BuildItem::Handle(res_handle),
2231                offset.clone(),
2232            ))
2233        } else {
2234            None
2235        }
2236    }
2237
2238    /// Select text between this element/node and the next of the same type
2239    fn textselector_for_markers<'b>(
2240        &self,
2241        node: Node,
2242        doc_num: usize,
2243        store: &AnnotationStore,
2244        element_config: &'b XmlElementConfig,
2245    ) -> Option<SelectorBuilder<'b>> {
2246        let resource = store
2247            .resource(
2248                self.resource_handle
2249                    .expect("resource must have been created"),
2250            )
2251            .expect("resource must exist");
2252        let mut end: Option<usize> = None;
2253        if let Some(markers) = self.markers.get(&element_config.hash()) {
2254            let mut grab = false;
2255            for (d_num, n_id) in markers.iter() {
2256                if grab {
2257                    //this marker is the next one, it's begin position is our desired end position
2258                    end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2259                        offset
2260                            .begin
2261                            .try_into()
2262                            .expect("begin cursor must be beginaligned")
2263                    });
2264                    break;
2265                }
2266                if doc_num == *d_num && *n_id == node.id() {
2267                    //current node/marker found, signal grab for the next one
2268                    grab = true;
2269                }
2270            }
2271        };
2272        if end.is_none() {
2273            //no next marker found, find the end
2274            //are we in a restricted scope?
2275            if let Some(scope) = element_config.marker_scope.as_deref() {
2276                if let Some((d_num, n_id)) = self.scopes.get(scope) {
2277                    end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2278                        offset
2279                            .end
2280                            .try_into()
2281                            .expect("end cursor must be beginaligned")
2282                    });
2283                } else {
2284                    eprintln!("WARNING: Undefined scope referenced in marker_scope: {}, no matching text with this `scope_id` in this document! Skipping last marker!", scope);
2285                    return None;
2286                }
2287            } else {
2288                //just use end of document instead
2289                end = Some(resource.textlen());
2290            }
2291        }
2292        if let (Some(offset), Some(end)) = (self.positionmap.get(&(doc_num, node.id(), PositionType::Body)), end) {
2293            Some(SelectorBuilder::TextSelector(
2294                BuildItem::Handle(self.resource_handle.unwrap()),
2295                Offset::simple(
2296                    offset
2297                        .begin
2298                        .try_into()
2299                        .expect("begin cursor must be beginaligned"),
2300                    end,
2301                ),
2302            ))
2303        } else {
2304            None
2305        }
2306    }
2307
2308    fn set_global_context(&mut self) {
2309        self.global_context
2310            .insert("context".into(), upon::Value::Map(self.config.context.iter().map(|(k,v)| (k.clone(), map_value(v))).collect()));
2311        self.global_context
2312            .insert("namespaces".into(), self.config.namespaces.clone().into());
2313        self.global_context
2314            .insert("default_set".into(), self.config.default_set.clone().into());
2315    }
2316
2317    fn render_template<'input, 't>(
2318        &self,
2319        template: &'t str,
2320        node: &Node<'a, 'input>,
2321        begin: Option<usize>,
2322        end: Option<usize>,
2323        resource: Option<&str>,
2324        inputfile: Option<&str>,
2325        doc_num: usize,
2326    ) -> Result<Cow<'t, str>, XmlConversionError> {
2327        if template.chars().any(|c| c == '{') {
2328            //value is a template, templating engine probably needed
2329            let compiled_template = self.template_engine.template(template);
2330            let context = self.context_for_node(&node, begin, end, template, resource, inputfile, doc_num);
2331            let result = compiled_template.render(context).to_string()?;
2332            Ok(Cow::Owned(result))
2333        } else {
2334            //value is a literal: templating engine not needed
2335            Ok(Cow::Borrowed(template))
2336        }
2337    }
2338
2339    fn context_for_node<'input>(
2340        &self,
2341        node: &Node<'a, 'input>,
2342        begin: Option<usize>,
2343        end: Option<usize>,
2344        template: &str, 
2345        resource: Option<&str>,
2346        inputfile: Option<&str>,
2347        doc_num: usize,
2348    ) -> upon::Value {
2349        let mut context = self.global_context.clone();
2350        let length = if let (Some(begin), Some(end)) = (begin, end) {
2351            Some(end - begin)
2352        } else {
2353            None
2354        };
2355        context.insert("localname".into(), node.tag_name().name().into());
2356        //name with name prefix (if any)
2357        context.insert("name".into(), self.get_node_name_for_template(node).into());
2358        if let Some(namespace) = node.tag_name().namespace() {
2359            //the full namespace
2360            context.insert("namespace".into(), namespace.into());
2361        }
2362
2363        // Offset in the untangled plain text
2364        if let Some(begin) = begin {
2365            context.insert("begin".into(), upon::Value::Integer(begin as i64));
2366        }
2367        if let Some(end) = end {
2368            context.insert("end".into(), upon::Value::Integer(end as i64));
2369        }
2370        if let Some(length) = length {
2371            context.insert("length".into(), upon::Value::Integer(length as i64));
2372        }
2373        if let Some(resource) = resource {
2374            //the resource ID
2375            context.insert("resource".into(), resource.into());
2376        }
2377        if let Some(inputfile) = inputfile {
2378            //the input file
2379            context.insert("inputfile".into(), inputfile.into());
2380        }
2381        //document number (0-indexed), useful in case multiple input documents are cast to a single output text
2382        context.insert("doc_num".into(), upon::Value::Integer(doc_num as i64));
2383
2384        if let Some(vars) = self.variables.get(template) {
2385            for var in vars {
2386                let mut encodedvar = String::new();
2387                if let Some(value) = self.context_for_var(node, var, &mut encodedvar, false) {
2388                    if self.config.debug() {
2389                        eprintln!(
2390                            "[STAM fromxml]              Set context variable for template '{}' for node '{}': {}={:?}   (encodedvar={})",
2391                            template,
2392                            node.tag_name().name(),
2393                            var,
2394                            value,
2395                            encodedvar
2396                        );
2397                    }
2398                    if value != upon::Value::None {
2399                        context.insert(encodedvar, value);
2400                    }
2401                } else if self.config.debug() {
2402                    eprintln!(
2403                        "[STAM fromxml]              Missed context variable for template '{}' for node '{}': {}",
2404                        template,
2405                        node.tag_name().name(),
2406                        var
2407                    );
2408                }
2409            }
2410        }
2411        upon::Value::Map(context)
2412    }
2413
2414    /// Looks up a variable value (from the DOM XML) to be used in for template context
2415    // returns value and stores full the *encoded* variable name in path (this is safe to pass to template)
2416    // return values are temporarily aggregated in multiple if multiple elements are requested, it will be emptied automatically, the caller owns it but doesn't use it itself.
2417    fn context_for_var<'input>(
2418        &self,
2419        node: &Node<'a, 'input>,
2420        var: &str,
2421        path: &mut String,
2422        mut return_all_matches: bool,
2423    ) -> Option<upon::Value> {
2424
2425        //are we the first call by the caller or are we a recursion?
2426        let first = path.is_empty();
2427
2428        let var = if var.starts_with("?.$$") {
2429            if first {
2430                path.push_str("?.ELEMENTS_");
2431                return_all_matches = true;
2432                if self.config.debug {
2433                    eprintln!("[STAM fromxml]              will return all matches for {}", var);
2434                }
2435            };
2436            &var[4..]
2437        } else if var.starts_with("?.$") {
2438            if first {
2439                path.push_str("?.ELEMENT_");
2440            };
2441            &var[3..]
2442        } else if var.starts_with("$$") {
2443            if first {
2444                path.push_str("ELEMENTS_");
2445                return_all_matches = true;
2446                if self.config.debug {
2447                    eprintln!("[STAM fromxml]              will return all matches for {}", var);
2448                }
2449            };
2450            &var[2..]
2451        } else if var.starts_with("$") {
2452            if first {
2453                path.push_str("ELEMENT_");
2454            };
2455            &var[1..]
2456        } else if var.starts_with("?.@") {
2457            if first {
2458                path.push_str("?.");
2459            };
2460            &var[2..]
2461        } else {
2462            var
2463        };
2464
2465        if !first && !var.is_empty() && !path.ends_with("ELEMENT_") && !path.ends_with("ELEMENTS_"){
2466            path.push_str("_IN_");
2467        }
2468
2469        //get the first component of the variable
2470        let (component, remainder) = var.split_once("/").unwrap_or((var,""));
2471        //eprintln!("DEBUG: component={}, remainder={}, node={}, return_all_matches={}", component, remainder, node.tag_name().name(), return_all_matches);
2472        if component.is_empty() {
2473            if first && !remainder.is_empty() {
2474                //we're asked to start at the root node
2475                let mut n = node.clone();
2476                //find the root node
2477                while let Some(parentnode) = n.parent_element() {
2478                    n = parentnode;
2479                }
2480                //recurse from root node
2481                let (rootcomponent, remainder) = remainder.split_once("/").unwrap_or((remainder,""));
2482                let (prefix, localname)  = if let Some(pos) = rootcomponent.find(":") {
2483                    (Some(&rootcomponent[0..pos]),  &rootcomponent[pos+1..])
2484                } else {
2485                    (None, rootcomponent)
2486                };
2487                //test if root name corresponds with what we expected
2488                if localname != n.tag_name().name() && localname != "*" {
2489                    None
2490                } else {
2491                    if let Some(prefix) = prefix {
2492                        path.push_str(prefix);
2493                        path.push_str("__");
2494                    }
2495                    path.push_str(localname);
2496                    self.context_for_var(&n, remainder, path, return_all_matches)
2497                }
2498            } else {
2499                //an empty component is the stop condition , this function is called recursively, stripping one
2500                //component at a time until nothing is left, we then take the text of that final node:
2501                Some(recursive_text(node).into())
2502            }
2503        } else if component.starts_with("@"){
2504            if let Some(pos) = component.find(":") {
2505                let prefix = &component[1..pos];
2506                if let Some(ns) = self.config.namespaces.get(prefix) {
2507                    let var = &component[pos+1..];
2508                    path.push_str("ATTRIB_");
2509                    path.push_str(prefix);
2510                    path.push_str("__");
2511                    path.push_str(var);
2512                    Some(
2513                        node.attribute((ns.as_str(),var)).into()
2514                    )
2515                } else {
2516                    None
2517                }
2518            } else {
2519                let var = &component[1..];
2520                path.push_str("ATTRIB_");
2521                path.push_str(var);
2522                Some(
2523                    node.attribute(var).into()
2524                )
2525            }
2526        } else if component == ".." {
2527            if let Some(parentnode) = node.parent_element().as_ref() {
2528                //recurse with parent node
2529                path.push_str("PARENT");
2530                self.context_for_var(parentnode, remainder, path, return_all_matches)
2531            } else {
2532                None
2533            }
2534        } else if component == "." {
2535            path.push_str("THIS");
2536            if !remainder.is_empty() {
2537                //a . is meaningless if not the final component
2538                self.context_for_var(node, remainder, path, return_all_matches)
2539            } else {
2540                Some(recursive_text(node).into())
2541            }
2542        } else {
2543            let (prefix, localname)  = if let Some(pos) = component.find(":") {
2544                (Some(&component[0..pos]),  &component[pos+1..])
2545            } else {
2546                (None, component)
2547            };
2548            let localname_with_condition = localname;
2549            let (localname, condition_str, condition) = self.extract_condition(localname_with_condition); //extract X-Path like conditions [@attrib="value"]  (very limited!)
2550            //eprintln!("DEBUG: looking for {} (prefix={:?},localname={}, condition={:?}) in {:?}", localname_with_condition,  prefix, localname, condition, node.tag_name());
2551            let mut multiple_value_buffer: Vec<upon::Value> = Vec::new(); //only used when multiple == true
2552            let mut final_path: String = String::new(); //only used when multiple == true
2553            for child in node.children() {
2554                if child.is_element() {
2555                    let namedata = child.tag_name();
2556                    let mut child_matches = if let Some(namespace) = namedata.namespace() {
2557                        if let Some(foundprefix) = self.prefixes.get(namespace) {
2558                            Some(foundprefix.as_str()) == prefix && localname == namedata.name()
2559                        } else {
2560                            false
2561                        }
2562                    } else {
2563                        namedata.name() == localname
2564                    };
2565                    if child_matches {
2566                        //MAYBE TODO: move to separate funtion
2567                        if let Some((attribname, negate, attribvalue)) = condition {
2568                            //test condition: falsify child_matches
2569                            if let Some(pos) = attribname.find(":") {
2570                                let prefix = &attribname[0..pos];
2571                                if let Some(ns) = self.config.namespaces.get(prefix) {
2572                                    let attribname = &attribname[pos+1..];
2573                                    if let Some(value) = child.attribute((ns.as_str(),attribname)) {
2574                                        if !negate && attribvalue != Some(value) {
2575                                            child_matches = false;
2576                                        } else if negate && attribvalue == Some(value) {
2577                                            child_matches = false;
2578                                        }
2579                                    } else {
2580                                        child_matches = false;
2581                                    }
2582                                } else {
2583                                    child_matches = false;
2584                                }
2585                            } else {
2586                                if let Some(value) = child.attribute(attribname) {
2587                                    if !negate && attribvalue != Some(value) {
2588                                        child_matches = false;
2589                                    } else if negate && attribvalue == Some(value) {
2590                                        child_matches = false;
2591                                    }
2592                                } else {
2593                                    child_matches = false;
2594                                }
2595                            }
2596                        }
2597                        if !child_matches && self.config.debug {
2598                            eprintln!("[STAM fromxml] candidate node does not meet condition: {}", localname_with_condition);
2599                        }
2600                        //end condition test
2601                    }
2602                    if child_matches {
2603                        let prevpathlen = path.len();
2604                        //update path
2605                        if let Some(prefix) = prefix {
2606                            path.push_str(prefix);
2607                            path.push_str("__");
2608                        }
2609                        path.push_str(localname);
2610                        if condition.is_some() {
2611                            //simply encode the condition as a hash (non-decodable but that's okay)
2612                            let mut hasher = DefaultHasher::new();
2613                            condition_str.hash(&mut hasher);
2614                            let h = hasher.finish();
2615                            path.push_str(&format!("_COND{}_", h));
2616                        }
2617                        if let Some(value) = self.context_for_var(&child, remainder, path, return_all_matches) {
2618                            //success
2619                            if return_all_matches {
2620                                if let upon::Value::List(v) = value {
2621                                    multiple_value_buffer.extend(v.into_iter());
2622                                } else {
2623                                    multiple_value_buffer.push(value);
2624                                }
2625                                if final_path.is_empty() {
2626                                    final_path = path.clone();
2627                                }
2628                                //do not return yet, there may be more!
2629                            } else {
2630                                //normal behaviour, get first match
2631                                return Some(value);
2632                            }
2633                        }
2634                        //child didn't match (or we want multiple matches), truncate path again and continue search (a later child may match again!)
2635                        path.truncate(prevpathlen);
2636                    }
2637                }
2638            }
2639            if !multiple_value_buffer.is_empty() {
2640                //we found multiple values, return them
2641                if self.config.debug {
2642                    eprintln!("[STAM fromxml]              returning multiple matches of {} as list", var);
2643                }
2644                //we also return the path of the match
2645                *path = final_path;
2646                Some(multiple_value_buffer.into())
2647            } else {
2648                //no match found for this variable
2649                if self.config.debug {
2650                    eprintln!("[STAM fromxml]              returning with no match found for {} in {}", var, node.tag_name().name());
2651                }
2652                None
2653            }
2654        }
2655    }
2656
2657    fn extract_condition<'b>(&self, localname: &'b str) -> (&'b str, &'b str, Option<(&'b str, bool, Option<&'b str>)>) { //(localname, condition, Option<(attrib, negation, attribvalue)>)
2658        //simple conditional statement
2659        if localname.ends_with("]") {
2660            if let Some(pos) = localname.find("[") {
2661                let condition = &localname[pos+1..localname.len()-1];
2662                let (mut attrib, negation, attribvalue) = if let Some(pos) = condition.find("=") {
2663                     let attrib = condition[0..pos].trim();
2664                     let value = condition[pos+1..].trim();
2665                     let value = &value[1..value.len() - 1]; //strips the literal quotes (") for the value
2666                     if attrib.ends_with('!') {
2667                        //negation (!= operator)
2668                        (attrib[..attrib.len() - 1].trim(), true, Some(value))
2669                     } else {
2670                        (attrib.trim(), false, Some(value))
2671                     }
2672                } else {
2673                    (condition, false, None)
2674                };
2675                if attrib.starts_with('@') {
2676                    //this should actually be mandatory and already checked during template precompilation
2677                    attrib = &attrib[1..];
2678                }
2679                return (&localname[..pos], condition, Some((attrib,  negation,attribvalue )) );
2680            }
2681        }
2682        (localname, "", None)
2683    }
2684
2685
2686    fn get_node_name_for_template<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2687        let extended_name = node.tag_name();
2688        match (extended_name.namespace(), extended_name.name()) {
2689            (Some(namespace), tagname) => {
2690                if let Some(prefix) = self.prefixes.get(namespace) {
2691                    Cow::Owned(format!("{}__{}", prefix, tagname))
2692                } else {
2693                    Cow::Borrowed(tagname)
2694                }
2695            }
2696            (None, tagname) => Cow::Borrowed(tagname),
2697        }
2698    }
2699
2700    fn get_node_name_for_xpath<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2701        let extended_name = node.tag_name();
2702        match (extended_name.namespace(), extended_name.name()) {
2703            (Some(namespace), tagname) => {
2704                if let Some(prefix) = self.prefixes.get(namespace) {
2705                    Cow::Owned(format!("{}:{}", prefix, tagname))
2706                } else {
2707                    Cow::Borrowed(tagname)
2708                }
2709            }
2710            (None, tagname) => Cow::Borrowed(tagname),
2711        }
2712    }
2713
2714
2715    fn precompile(&mut self, template: &'a str) -> Cow<'a,str> {
2716        let mut replacement = String::new();
2717        let mut variables: BTreeSet<&'a str> = BTreeSet::new();
2718        let mut begin = 0;
2719        let mut end = 0;
2720        for i  in 0..template.len() {
2721            let slice = &template[i..];
2722            if slice.starts_with("{{") || slice.starts_with("{%") {
2723                begin = i;
2724            } else if slice.starts_with("}}") || slice.starts_with("%}") {
2725                if end < begin+2 {
2726                    replacement.push_str(&template[end..begin+2]);
2727                }
2728                let inner = &template[begin+2..i]; //the part without the {{  }}
2729                replacement.push_str(&self.precompile_inblock(inner, &mut variables));
2730                end = i;
2731            }
2732        }
2733        if end > 0 {
2734            replacement.push_str(&template[end..]);
2735        }
2736        self.variables.insert(template.into(), variables);
2737        //eprintln!("DEBUG: precompile({}) -> {}", template, replacement);
2738
2739        if !replacement.is_empty() {
2740            Cow::Owned(replacement)
2741        } else {
2742            Cow::Borrowed(template)
2743        }
2744    }
2745
2746    fn precompile_inblock<'s>(&self, s: &'s str, vars: &mut BTreeSet<&'s str>) -> Cow<'s,str> {
2747        let mut quoted = false;
2748        let mut var = false;
2749        let mut begin = 0;
2750        let mut end = 0;
2751        let mut replacement = String::new();
2752        let mut in_condition = false;
2753        for (i,c) in s.char_indices() {
2754            if in_condition && c != ']' {
2755                continue;
2756            }
2757            if c == '"' {
2758                quoted = !quoted;
2759            } else if !quoted {
2760                if !var && (c == '@' || c == '$') {
2761                    //token is an XML variable name, its syntax needs some changes before it can be used in the templating engine
2762                    var = true;
2763                    begin = i;
2764                } else if var && c == '[' {
2765                    in_condition = true;
2766                } else if var && in_condition && c == ']' {
2767                    //end of condition
2768                    in_condition = false;
2769                } else if var && in_condition  {
2770                    //in condition
2771                    continue;
2772                } else if var && (!c.is_alphanumeric() && c != '$' && c != '.' && c != '/' && c != '_' && c != ':' && c != '@') {
2773                    //end of variable (including condition if applicable)
2774                    if end < begin {
2775                        replacement.push_str(&s[end..begin]);
2776                    }
2777                    let varname = &s[begin..i];
2778                    vars.insert(varname);
2779                    let replacement_var = self.precompile_name(varname);
2780                    replacement += &replacement_var;
2781                    end = i;
2782                    var = false;
2783                }
2784            }
2785        }
2786        if end > 0 {
2787            replacement.push_str(&s[end..]);
2788        }
2789        if var {
2790            //don't forget last one
2791            let varname = &s[begin..];
2792            vars.insert(varname);
2793            let replacement_var = self.precompile_name(varname);
2794            replacement += &replacement_var;
2795        }
2796        if !replacement.is_empty() {
2797            //eprintln!("DEBUG: precompile_inblock({}) -> {}", s, replacement);
2798            Cow::Owned(replacement)
2799        } else {
2800            Cow::Borrowed(s)
2801        }
2802    }
2803
2804    /// upon's templating syntax doesn't support some of the characters we use in names, this function substitutes them for more verbose equivalents
2805    fn precompile_name(&self, s: &str) -> String {
2806        let mut replacement = String::new();
2807        let mut begincondition = None;
2808        let mut skip = 0;
2809        for (i,c) in s.char_indices() {
2810            if begincondition.is_some() && c != ']' {
2811                continue;
2812            } else if skip > 0 {
2813                skip -= 1;
2814                continue;
2815            }
2816            if c == '$' {
2817                let slice = &s[i..];
2818                if slice.starts_with("$$..") {
2819                    replacement.push_str("ELEMENTS_PARENT");
2820                    skip = 3;
2821                } else if slice.starts_with("$$.") {
2822                    replacement.push_str("ELEMENTS_THIS");
2823                    skip = 2;
2824                } else if slice.starts_with("$$/") {
2825                    replacement.push_str("ELEMENTS_");
2826                    skip = 2;
2827                } else if slice.starts_with("$$") {
2828                    replacement.push_str("ELEMENTS_");
2829                    skip = 1;
2830                } else if slice.starts_with("$..") {
2831                    replacement.push_str("ELEMENT_PARENT");
2832                    skip = 2;
2833                } else if slice.starts_with("$.") {
2834                    replacement.push_str("ELEMENT_THIS");
2835                    skip = 1;
2836                } else if slice.starts_with("$/") {
2837                    replacement.push_str("ELEMENT_");
2838                    skip = 1;
2839                } else {
2840                    replacement.push_str("ELEMENT_");
2841                }
2842            } else if c == '@' {
2843                replacement.push_str("ATTRIB_");
2844            } else if c == '/' {
2845                replacement.push_str("_IN_");
2846            } else if c == ':' {
2847                replacement.push_str("__");
2848            } else if c == '[' {
2849                begincondition = Some(i+1);
2850            } else if c == ']' {
2851                //conditions are just stored as hashes
2852                if let Some(begin) = begincondition {
2853                    let mut hasher = DefaultHasher::new();
2854                    let _ = &s[begin..i].hash(&mut hasher);
2855                    let h = hasher.finish();
2856                    replacement.push_str(&format!("_COND{}_", h));
2857                }
2858                begincondition = None;
2859            } else {
2860                replacement.push(c);
2861            }
2862        }
2863        //eprintln!("DEBUG: precompile_name({}) -> {}", s, replacement);
2864        replacement
2865    }
2866
2867    fn add_metadata(&self, store: &mut AnnotationStore) -> Result<(), XmlConversionError> {
2868        for metadata in self.config.metadata.iter() {
2869            let mut builder = AnnotationBuilder::new();
2870
2871            let resource_id = if let Some(resource_handle) = self.resource_handle {
2872                store.resource(resource_handle).unwrap().id()
2873            } else {
2874                None
2875            };
2876
2877            let mut context = self.global_context.clone();
2878            if let Some(resource_id) = resource_id {
2879                context.insert("resource".into(), resource_id.into());
2880            }
2881
2882            if let Some(template) = &metadata.id {
2883                let compiled_template = self.template_engine.template(template.as_str());
2884                let id = compiled_template.render(&context).to_string().map_err(|e| 
2885                        XmlConversionError::TemplateError(
2886                            format!(
2887                                "whilst rendering metadata id template '{}'",
2888                                template,
2889                            ),
2890                            Some(e),
2891                        )
2892                    )?;
2893                if !id.is_empty() {
2894                    builder = builder.with_id(id);
2895                }
2896            }
2897
2898            for annotationdata in metadata.annotationdata.iter() {
2899                let mut databuilder = AnnotationDataBuilder::new();
2900                if let Some(template) = &annotationdata.set {
2901                    let compiled_template = self.template_engine.template(template.as_str());
2902                    let dataset = compiled_template.render(&context).to_string().map_err(|e| 
2903                            XmlConversionError::TemplateError(
2904                                format!(
2905                                    "whilst rendering annotationdata/dataset template '{}' for metadata",
2906                                    template,
2907                                ),
2908                                Some(e),
2909                            )
2910                        )?;
2911                    if !dataset.is_empty() {
2912                        databuilder = databuilder.with_dataset(dataset.into())
2913                    }
2914                } else {
2915                    databuilder =
2916                        databuilder.with_dataset(self.config.default_set.as_str().into());
2917                }
2918                if let Some(template) = &annotationdata.key {
2919                    let compiled_template = self.template_engine.template(template.as_str());
2920                    match compiled_template.render(&context).to_string().map_err(|e| 
2921                            XmlConversionError::TemplateError(
2922                                format!(
2923                                    "whilst rendering annotationdata/key template '{}' for metadata",
2924                                    template,
2925                                ),
2926                                Some(e),
2927                            )
2928                        )  {
2929                        Ok(key) if !key.is_empty() =>
2930                            databuilder = databuilder.with_key(key.into()) ,
2931                        Ok(_) if !annotationdata.skip_if_missing => {
2932                            return Err(XmlConversionError::TemplateError(
2933                                format!(
2934                                    "whilst rendering annotationdata/key template '{}' metadata",
2935                                    template,
2936                                ),
2937                                None
2938                            ));
2939                        },
2940                        Err(e) if !annotationdata.skip_if_missing => {
2941                            return Err(e)
2942                        },
2943                        _ => {
2944                            //skip whole databuilder if missing
2945                            continue
2946                        }
2947                    }
2948                }
2949                if let Some(value) = &annotationdata.value {
2950                    match self.extract_value_metadata(value, &upon::Value::Map(context.clone()), annotationdata.allow_empty_value, annotationdata.skip_if_missing,  resource_id.as_deref())? {
2951                        Some(value) => {
2952                            databuilder = databuilder.with_value(value);
2953                        },
2954                        None =>  {
2955                            //skip whole databuilder if missing
2956                            continue
2957                        }
2958                    }
2959                }
2960                builder = builder.with_data_builder(databuilder);
2961            }
2962
2963
2964
2965            // Finish the builder and add the actual annotation to the store, according to its element handling
2966            match metadata.annotation {
2967                XmlAnnotationHandling::TextSelector => {
2968                    // Annotation is on text, translates to TextSelector
2969                    builder = builder.with_target(SelectorBuilder::TextSelector(BuildItem::Handle(self.resource_handle.expect("resource must have handle")), Offset::whole()));
2970                    if self.config.debug {
2971                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2972                    }
2973                    store.annotate(builder)?;
2974                }
2975                XmlAnnotationHandling::ResourceSelector  | XmlAnnotationHandling::None | XmlAnnotationHandling::Unspecified => {
2976                    // Annotation is metadata (default), translates to ResourceSelector
2977                    builder = builder.with_target(SelectorBuilder::ResourceSelector(
2978                        self.resource_handle.into(),
2979                    ));
2980                    if self.config.debug {
2981                        eprintln!("[STAM fromxml]   builder AnnotateResource: {:?}", builder);
2982                    }
2983                    store.annotate(builder)?;
2984                }
2985                _ => panic!(
2986                    "Invalid annotationhandling for metadata: {:?}",
2987                    metadata.annotation
2988                ),
2989            }
2990        }
2991        Ok(())
2992    }
2993}
2994
2995
2996
2997/// Get recursive text without any elements
2998fn recursive_text(node: &Node) -> String {
2999    let mut s = String::new();
3000    for child in node.children() {
3001        if child.is_text() {
3002            s += child.text().expect("should have text");
3003        } else if child.is_element() {
3004            s += &recursive_text(&child);
3005        }
3006    }
3007    s
3008}
3009
3010// Filters
3011fn filter_capitalize(s: &str) -> String {
3012    let mut out = String::with_capacity(s.len());
3013    for (i, c) in s.chars().enumerate() {
3014        if i == 0 {
3015            out.push_str(&c.to_uppercase().collect::<String>())
3016        } else {
3017            out.push(c);
3018        }
3019    }
3020    out
3021}
3022
3023fn filter_gt(a: &upon::Value, b: &upon::Value) -> bool {
3024    match (a, b) {
3025        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a > *b,
3026        (upon::Value::Float(a), upon::Value::Float(b)) => *a > *b,
3027        (upon::Value::String(a), upon::Value::String(b)) => *a > *b,
3028        _ => false,
3029    }
3030}
3031
3032fn filter_lt(a: &upon::Value, b: &upon::Value) -> bool {
3033    match (a, b) {
3034        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a < *b,
3035        (upon::Value::Float(a), upon::Value::Float(b)) => *a < *b,
3036        (upon::Value::String(a), upon::Value::String(b)) => *a < *b,
3037        _ => false,
3038    }
3039}
3040
3041fn filter_gte(a: &upon::Value, b: &upon::Value) -> bool {
3042    match (a, b) {
3043        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a >= *b,
3044        (upon::Value::Float(a), upon::Value::Float(b)) => *a >= *b,
3045        (upon::Value::String(a), upon::Value::String(b)) => *a >= *b,
3046        _ => false,
3047    }
3048}
3049
3050fn filter_lte(a: &upon::Value, b: &upon::Value) -> bool {
3051    match (a, b) {
3052        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a <= *b,
3053        (upon::Value::Float(a), upon::Value::Float(b)) => *a <= *b,
3054        (upon::Value::String(a), upon::Value::String(b)) => *a <= *b,
3055        _ => false,
3056    }
3057}
3058
3059fn filter_add(a: &upon::Value, b: &upon::Value) -> upon::Value {
3060    match (a, b) {
3061        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a + b),
3062        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a + b),
3063        (upon::Value::String(a), upon::Value::String(b)) => upon::Value::String(a.clone() + b),
3064        _ => upon::Value::None,
3065    }
3066}
3067
3068fn filter_sub(a: &upon::Value, b: &upon::Value) -> upon::Value {
3069    match (a, b) {
3070        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a - b),
3071        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a - b),
3072        _ => upon::Value::None,
3073    }
3074}
3075
3076fn filter_mul(a: &upon::Value, b: &upon::Value) -> upon::Value {
3077    match (a, b) {
3078        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a * b),
3079        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a * b),
3080        _ => upon::Value::None,
3081    }
3082}
3083
3084fn filter_div(a: &upon::Value, b: &upon::Value) -> upon::Value {
3085    match (a, b) {
3086        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a / b),
3087        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a / b),
3088        _ => upon::Value::None,
3089    }
3090}
3091
3092
3093/// Map value between toml and upon. This makes a clone.
3094fn map_value(value: &toml::Value) -> upon::Value {
3095    match value {
3096        toml::Value::String(s) => upon::Value::String(s.clone()),
3097        toml::Value::Integer(i) => upon::Value::Integer(*i),
3098        toml::Value::Float(i) => upon::Value::Float(*i),
3099        toml::Value::Boolean(v) => upon::Value::Bool(*v),
3100        toml::Value::Datetime(s) => upon::Value::String(s.to_string()),
3101        toml::Value::Array(v) => upon::Value::List(v.iter().map(|i| map_value(i)).collect()),
3102        toml::Value::Table(v) => upon::Value::Map(v.iter().map(|(k,i)| (k.clone(),map_value(i))).collect()),
3103    }
3104}
3105
3106/// Parse a string that is a result from the template renderer to a DataValue again
3107#[inline]
3108fn string_to_datavalue(value: String, valuetype: Option<&str>) -> Result<DataValue,XmlConversionError> {
3109    match valuetype {
3110        Some("str") | Some("string")  => Ok(DataValue::String(value)),
3111        Some("int") => {
3112            if let Ok(value) = value.parse::<isize>() {
3113                Ok(DataValue::Int(value))
3114            } else {
3115                Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3116            }
3117        },
3118        Some("float") => {
3119            if let Ok(value) = value.parse::<f64>() {
3120                Ok(DataValue::Float(value))
3121            } else {
3122                Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3123            }
3124        },
3125        Some("bool") => match value.as_str() {
3126            "yes" | "true" | "enabled" | "on" | "1" | "active"  => Ok(DataValue::Bool(true)),
3127            _ => Ok(DataValue::Bool(false))
3128        },
3129        Some(x) => {
3130                Err(XmlConversionError::TemplateError(format!("Invalid valuetype: {}", x), None))
3131        }
3132        None => {
3133            //automatically determine type
3134            if let Ok(value) =  value.parse::<isize>() {
3135                Ok(DataValue::Int(value))
3136            } else if let Ok(value) =  value.parse::<f64>() {
3137                Ok(DataValue::Float(value))
3138            } else if value.starts_with("(list) [ ") && value.ends_with(" ]") {
3139                //deserialize lists again
3140                if let Ok(serde_json::Value::Array(values)) = serde_json::from_str(&value[6..]) {
3141                    Ok(DataValue::List(values.into_iter().map(|v| {
3142                        match v {
3143                            serde_json::Value::String(s) => DataValue::String(s),
3144                            serde_json::Value::Number(n) => if let Some(n) = n.as_i64() {
3145                                DataValue::Int(n as isize)
3146                            } else if let Some(n) = n.as_f64() {
3147                                DataValue::Float(n)
3148                            } else {
3149                                unreachable!("number should always be either int or float")
3150                            },
3151                            serde_json::Value::Bool(b) => DataValue::Bool(b),
3152                            _ => DataValue::Null, //nested arrays and maps are NOT supported here!
3153                        }
3154                    }).collect()))
3155                } else {
3156                    Err(XmlConversionError::TemplateError(format!("Unable to deserialize list value: {}", value), None))
3157                }
3158            } else {
3159                Ok(value.into())
3160            }
3161        }
3162    }
3163}
3164
3165fn string_to_templatevalue(value: String) -> upon::Value {
3166    if let Ok(value) =  value.parse::<i64>() {
3167        upon::Value::Integer(value)
3168    } else if let Ok(value) =  value.parse::<f64>() {
3169        upon::Value::Float(value)
3170    } else {
3171        upon::Value::String(value)
3172    }
3173}
3174
3175/// Custom formatter for templating that can also handle lists (the default one in upon can't)
3176/// Lists will be output JSON-style prepended by the marker text "(list) ", this allows deserialisers to turn it into a list again
3177fn value_formatter(f: &mut upon::fmt::Formatter<'_>, value: &upon::Value) -> upon::fmt::Result {
3178    match value {
3179        upon::Value::List(vs) => {
3180            f.write_str("(list) [ ")?;
3181            for (i, v) in vs.iter().enumerate() {
3182                if i > 0 {
3183                    f.write_str(", ")?;
3184                }
3185                if let upon::Value::String(s) = v {
3186                    write!(f, "\"{}\"", s.replace("\"","\\\"").replace("\n"," ").split_whitespace().collect::<Vec<_>>().join(" "))?;
3187                } else {
3188                    upon::fmt::default(f, v)?;
3189                    f.write_char('"')?;
3190                }
3191            }
3192            f.write_str(" ]")?;
3193        }
3194        v => upon::fmt::default(f, v)?, // fallback to default formatter
3195    };
3196    Ok(())
3197}
3198
3199#[derive(Clone,Debug,Deserialize)]
3200struct ExternalFilter {
3201    /// The name of the filter
3202    name: String,
3203
3204    /// The command to run.
3205    command: String,
3206
3207    /// The arguments to pass to the command, you can use "{{ value }}" or `$value` to represent the input value if needed. It will also be passed to stdin. No escaping needed, it is not mediated by a shell.
3208    args: Vec<String>
3209}
3210
3211impl ExternalFilter {
3212    //TODO: panic may be too strict in here:
3213    fn run(&self, input_value: &upon::Value) -> upon::Value {
3214        let process = Command::new(self.command.as_str()).args(
3215            //args are passed directly, not mediated via shell, so no escaping necessary
3216            self.args.iter().map(|x| if x == "{{value}}" || x == "{{ value }}" || x == "$value" {
3217                match input_value {
3218                    upon::Value::String(s) => s.clone(),
3219                    upon::Value::Integer(d) => format!("{}",d),
3220                    upon::Value::Float(d) => format!("{}",d),
3221                    upon::Value::Bool(d) => format!("{}",d),
3222                    upon::Value::None => String::new(),
3223                    _ => panic!("Lists and maps are not supported to be passed as parameter to  external filters yet!"), 
3224                }
3225            } else {
3226                x.clone() //too much cloning, but Cow didn't work here because it is coerced into OsStr later
3227            })
3228        ).stdin(Stdio::piped()).stdout(Stdio::piped()).spawn();
3229
3230
3231        if let Ok(mut process) = process {
3232            {
3233                let mut outstdin = process.stdin.take().expect("unable to open stdin for external filter");
3234                let mut writer = BufWriter::new(&mut outstdin);
3235                match input_value {
3236                    upon::Value::String(s) => writer.write(s.as_bytes()),
3237                    upon::Value::Integer(d) => writer.write(format!("{}",d).as_bytes()),
3238                    upon::Value::Float(d) => writer.write(format!("{}",d).as_bytes()),
3239                    upon::Value::Bool(d) => writer.write(format!("{}",d).as_bytes()),
3240                    upon::Value::None => writer.write(&[]),
3241                    _ => panic!("Lists and maps are not supported to be passed as input to external filters yet!"),
3242                }.expect("Writing to stdin for external filter failed!");
3243                //block ensures writer and outputsdin are dropped prior to waiting for output
3244            }
3245            let output = process.wait_with_output().expect("External filter wasn't running");
3246            if !output.status.success() {
3247                panic!("External filter {} failed ({:?})", self.name, output.status.code());
3248            }
3249            if let Ok(s) = String::from_utf8(output.stdout) {
3250                return string_to_templatevalue(s);
3251            } else {
3252                panic!("External filter {} produced invalid UTF-8!", self.name);
3253            }
3254        }
3255        panic!("External filter {} failed!", self.name);
3256    }
3257}
3258
3259#[cfg(test)]
3260mod tests {
3261    use super::*;
3262    //use crate::info::info;
3263
3264    const XMLSMALLEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3265<head><title>test</title></head><body><h1>TEST</h1><p xml:id="p1" n="001">This  is a <em xml:id="emphasis" style="color:green">test</em>.</p></body></html>"#;
3266
3267    const XMLEXAMPLE: &'static str = r#"<!DOCTYPE entities[<!ENTITY nbsp "&#xA0;">]>
3268<html xmlns="http://www.w3.org/1999/xhtml" xmlns:my="http://example.com">
3269<head>
3270    <title>Test</title>
3271    <meta name="author" content="proycon" />
3272</head>
3273<body>
3274    <h1>Header</h1>
3275
3276    <p xml:id="par1">
3277        <span xml:id="sen1">This is a sentence.</span>
3278        <span xml:id="sen2">This is the second&nbsp;sentence.</span>
3279    </p>
3280    <p xml:id="par2">
3281        <strong>This</strong> is    the <em>second</em> paragraph.
3282            It has a <strong>bold</strong> word and one in <em>italics</em>.<br/>
3283        Let's highlight stress in the following word: <span my:stress="secondary">re</span>pu<span my:stress="primary">ta</span>tion.
3284    </p>
3285    <p xml:space="preserve"><![CDATA[This    third
3286paragraph consists
3287of CDATA and is configured to preserve whitespace, and weird &entities; ]]></p>
3288
3289    <h2>Subsection</h2>
3290
3291    <p>
3292    Have some fruits:<br/>
3293    <ul xml:id="list1" class="fruits">
3294        <li xml:id="fruit1">apple</li>
3295        <li xml:id="fruit2">banana</li>
3296        <li xml:id="fruit3">melon</li>
3297    </ul>
3298    </p>
3299
3300    Some lingering text outside of any confines...
3301</body>
3302</html>"#;
3303
3304    const XMLEXAMPLE_TEXTOUTPUT: &'static str = "Header\n\nThis is a sentence. This is the second sentence.\n\nThis is the second paragraph. It has a bold word and one in italics.\nLet's highlight stress in the following word: reputation.\n\nThis    third\nparagraph consists\nof CDATA and is configured to preserve whitespace, and weird &entities; \nSubsection\n\nHave some fruits:\n* apple\n* banana\n* melon\n\nSome lingering text outside of any confines...";
3305
3306    //fake example (not real HTML, testing TEI-like space attribute with complex template)
3307    const XMLTEISPACE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3308<body><space dim="vertical" unit="lines" quantity="3" /></body></html>"#;
3309
3310    const CONF: &'static str = r#"#default whitespace handling (Collapse or Preserve)
3311whitespace = "Collapse"
3312default_set = "urn:stam-fromhtml"
3313
3314[namespaces]
3315#this defines the namespace prefixes you can use in this configuration
3316xml = "http://www.w3.org/XML/1998/namespace"
3317html = "http://www.w3.org/1999/xhtml"
3318xsd =  "http://www.w3.org/2001/XMLSchema"
3319xlink = "http://www.w3.org/1999/xlink"
3320
3321# elements and attributes are matched in reverse-order, so put more generic statements before more specific ones
3322
3323#Define some base elements that we reuse later for actual elements (prevents unnecessary repetition)
3324[baseelements.common]
3325id = "{% if ?.@xml:id %}{{ @xml:id }}{% endif %}"
3326
3327    [[baseelements.common.annotationdata]]
3328    key = "type"
3329    value = "{{ localname }}"
3330
3331    [[baseelements.common.annotationdata]]
3332    key = "lang"
3333    value = "{{ @xml:lang }}"
3334    skip_if_missing = true
3335
3336    [[baseelements.common.annotationdata]]
3337    key = "n"
3338    value = "{{ @n }}"
3339    skip_if_missing = true
3340    valuetype = "int"
3341
3342    [[baseelements.common.annotationdata]]
3343    key = "nstring"
3344    value = "{{ @n }}"
3345    skip_if_missing = true
3346    valuetype = "string"
3347
3348    [[baseelements.common.annotationdata]]
3349    key = "style"
3350    value = "{{ @style }}"
3351    skip_if_missing = true
3352
3353    [[baseelements.common.annotationdata]]
3354    key = "class"
3355    value = "{{ @class }}"
3356    skip_if_missing = true
3357
3358    [[baseelements.common.annotationdata]]
3359    key = "src"
3360    value = "{{ @src }}"
3361    skip_if_missing = true
3362
3363[baseelements.text]
3364text = true
3365
3366
3367[[elements]]
3368base = [ "text", "common" ]
3369path = "*"
3370text = true
3371annotation = "TextSelector"
3372
3373# Pass through the following elements without mapping to text
3374[[elements]]
3375base = [ "common" ]
3376path = "//html:head"
3377
3378[[elements]]
3379base = [ "common" ]
3380path = "//html:head//*"
3381
3382# Map metadata like <meta name="key" content="value"> to annotations with key->value data selecting the resource (ResourceSelector)
3383[[elements]]
3384base = [ "common" ]
3385path = "//html:head//html:meta"
3386
3387[[elements.annotationdata]]
3388key = "{% if ?.@name %}{{ name }}{% endif %}"
3389value = "{% if ?.@content %}{{ @content }}{% endif %}"
3390skip_if_missing = true
3391
3392# By default, ignore any tags in the head (unless they're mentioned specifically later in the config)
3393[[elements]]
3394path = "//html:head/html:title"
3395annotation = "ResourceSelector"
3396
3397[[elements.annotationdata]]
3398key = "title"
3399value = "{{ $. | trim }}"
3400
3401
3402# Determine how various structural elements are converted to text
3403
3404[[elements]]
3405base = [ "common" ]
3406path = "//html:br"
3407textsuffix = "\n"
3408
3409[[elements]]
3410base = [ "common", "text" ]
3411path = "//html:p"
3412textprefix = "\n"
3413textsuffix = "\n"
3414annotation = "TextSelector"
3415
3416# Let's do headers and bulleted lists like markdown
3417[[elements]]
3418base = [ "common", "text" ]
3419path = "//html:h1"
3420textsuffix = "\n"
3421
3422[[elements]]
3423base = [ "common", "text" ]
3424path = "//html:h2"
3425textsuffix = "\n"
3426
3427#Generic, will be overriden by more specific one
3428[[elements]]
3429base = [ "common", "text" ]
3430path = "//html:li"
3431textprefix = "- "
3432textsuffix = "\n"
3433
3434[[elements]]
3435base = [ "common", "text" ]
3436path = """//html:body"""
3437annotation = "TextSelector"
3438id = "body"
3439
3440    [[elements.annotationdata]]
3441    key = "title_from_parent"
3442    value = "{{ $../html:head/html:title }}"
3443    skip_if_missing = true
3444
3445    [[elements.annotationdata]]
3446    key = "title_from_root"
3447    value = "{{ $/html:html/html:head/html:title }}"
3448    skip_if_missing = true
3449
3450    [[elements.annotationdata]]
3451    key = "firstfruit"
3452    value = """{{ $./html:p/html:ul/html:li }}"""
3453    skip_if_missing = true
3454
3455    [[elements.annotationdata]]
3456    key = "fruits"
3457    value = """{{ $$./html:p/html:ul/html:li }}"""
3458    skip_if_missing = true
3459
3460    [[elements.annotationdata]]
3461    key = "multifruits"
3462    value = """{{ $$./html:p/html:ul/html:li }}"""
3463    skip_if_missing = true
3464    multiple = true
3465
3466#More specific one takes precendence over the above generic one
3467[[elements]]
3468base = [ "common", "text" ]
3469path = """//html:ul[@class="fruits"]/html:li"""
3470textprefix = "* "
3471textsuffix = "\n"
3472
3473#Not real HTML, test-case modelled after TEI space
3474[[elements]]
3475base = [ "common" ]
3476path = """//html:space[@dim="vertical" and @unit="lines"]"""
3477text = true
3478textsuffix = """\n{% for x in @quantity | int | as_range %}\n{% endfor %}"""
3479
3480
3481[[elements]]
3482base = [ "common", "text" ]
3483path = "//html:example"
3484annotation = "TextSelector"
3485
3486[[elements.annotationdata]]
3487key = "requiredattrib"
3488value = "{{ @requiredattrib }}"
3489
3490[[elements.annotationdata]]
3491key = "optattrib"
3492value = "{{ ?.@optattrib }}"
3493
3494[[elements]]
3495base = [ "common","text" ]
3496path = "//html:marquee"
3497annotation = "TextSelector"
3498
3499#map value, some bogus data to test parsing
3500[[elements.annotationdata]]
3501key = "map"
3502
3503[elements.annotationdata.value]
3504text = "{{ $. }}"
3505number = 42
3506bogus = true
3507
3508[[metadata]]
3509id = "metadata"
3510
3511[[metadata.annotationdata]]
3512key = "author"
3513value = "proycon"
3514"#;
3515
3516    const XMLREQATTRIBEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3517<body><example xml:id="ann1" requiredattrib="blah">test</example></body></html>"#;
3518
3519    const XMLREQATTRIBEXAMPLE2: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3520<body><example xml:id="ann1">test</example></body></html>"#;
3521
3522    const XMLREQATTRIBEXAMPLE3: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3523<body><example xml:id="ann1" requiredattrib="blah" optattrib="blah">test</example></body></html>"#;
3524
3525    const XMLMAPEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3526<body><marquee xml:id="ann1">test</marquee></body></html>"#;
3527
3528    #[test]
3529    fn test_precompile_template_nochange() -> Result<(), String> {
3530        let config = XmlConversionConfig::new();
3531        let mut conv = XmlToStamConverter::new(&config);
3532        let template_in = "{{ foo }}";
3533        let template_out = conv.precompile(template_in);
3534        assert_eq!( template_out, template_in);
3535        //foo is not a special variable
3536        assert!(!conv.variables.get(template_in).as_ref().unwrap().contains("foo"));
3537        Ok(())
3538    }
3539
3540    #[test]
3541    fn test_precompile_template_attrib() -> Result<(), String> {
3542        let config = XmlConversionConfig::new();
3543        let mut conv = XmlToStamConverter::new(&config);
3544        let template_in = "{{ @foo }}";
3545        let template_out = conv.precompile(template_in);
3546        assert_eq!(template_out, "{{ ATTRIB_foo }}");
3547        //foo is an attribute so is returned 
3548        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3549        Ok(())
3550    }
3551
3552    #[test]
3553    fn test_precompile_template_attrib_ns() -> Result<(), String> {
3554        let config = XmlConversionConfig::new();
3555        let mut conv = XmlToStamConverter::new(&config);
3556        let template_in = "{{ @bar:foo }}";
3557        let template_out = conv.precompile(template_in);
3558        assert_eq!(template_out, "{{ ATTRIB_bar__foo }}");
3559        //foo is an attribute so is returned 
3560        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@bar:foo"));
3561        Ok(())
3562    }
3563
3564    #[test]
3565    fn test_precompile_template_element() -> Result<(), String> {
3566        let config = XmlConversionConfig::new();
3567        let mut conv = XmlToStamConverter::new(&config);
3568        let template_in = "{{ $foo }}";
3569        let template_out = conv.precompile(template_in);
3570        assert_eq!(template_out, "{{ ELEMENT_foo }}");
3571        //foo is an element so is returned 
3572        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$foo"));
3573        Ok(())
3574    }
3575
3576    #[test]
3577    fn test_precompile_template_element_ns() -> Result<(), String> {
3578        let config = XmlConversionConfig::new();
3579        let mut conv = XmlToStamConverter::new(&config);
3580        let template_in = "{{ $bar:foo }}";
3581        let template_out = conv.precompile(template_in);
3582        assert_eq!(template_out, "{{ ELEMENT_bar__foo }}");
3583        //foo is an element so is returned 
3584        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$bar:foo"));
3585        Ok(())
3586    }
3587
3588    #[test]
3589    fn test_precompile_template_this_text() -> Result<(), String> {
3590        let config = XmlConversionConfig::new();
3591        let mut conv = XmlToStamConverter::new(&config);
3592        let template_in = "{{ $. }}";
3593        let template_out = conv.precompile(template_in);
3594        assert_eq!(template_out, "{{ ELEMENT_THIS }}");
3595        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$."));
3596        Ok(())
3597    }
3598
3599    #[test]
3600    fn test_precompile_template_parent_text() -> Result<(), String> {
3601        let config = XmlConversionConfig::new();
3602        let mut conv = XmlToStamConverter::new(&config);
3603        let template_in = "{{ $.. }}";
3604        let template_out = conv.precompile(template_in);
3605        assert_eq!(template_out, "{{ ELEMENT_PARENT }}");
3606        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$.."));
3607        Ok(())
3608    }
3609
3610    #[test]
3611    fn test_precompile_template_elements() -> Result<(), String> {
3612        let config = XmlConversionConfig::new();
3613        let mut conv = XmlToStamConverter::new(&config);
3614        let template_in = "{{ $$foo }}";
3615        let template_out = conv.precompile(template_in);
3616        assert_eq!(template_out, "{{ ELEMENTS_foo }}");
3617        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$foo"));
3618        Ok(())
3619    }
3620
3621    #[test]
3622    fn test_precompile_template_elements_ns() -> Result<(), String> {
3623        let config = XmlConversionConfig::new();
3624        let mut conv = XmlToStamConverter::new(&config);
3625        let template_in = "{{ $$bar:foo }}";
3626        let template_out = conv.precompile(template_in);
3627        assert_eq!(template_out, "{{ ELEMENTS_bar__foo }}");
3628        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$bar:foo"));
3629        Ok(())
3630    }
3631
3632
3633    #[test]
3634    fn test_precompile_template_attrib2() -> Result<(), String> {
3635        let config = XmlConversionConfig::new();
3636        let mut conv = XmlToStamConverter::new(&config);
3637        let template_in = "{% for x in @foo %}";
3638        let template_out = conv.precompile(template_in);
3639        assert_eq!(template_out, "{% for x in ATTRIB_foo %}");
3640        //foo is an attribute so is returned 
3641        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3642        Ok(())
3643    }
3644
3645    #[test]
3646    fn test_precompile_template_attrib3() -> Result<(), String> {
3647        let config = XmlConversionConfig::new();
3648        let mut conv = XmlToStamConverter::new(&config);
3649        let template_in = "{{ ?.@foo }}";
3650        let template_out = conv.precompile(template_in);
3651        assert_eq!(template_out, "{{ ?.ATTRIB_foo }}");
3652        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3653        Ok(())
3654    }
3655
3656    #[test]
3657    fn test_precompile_template_path() -> Result<(), String> {
3658        let config = XmlConversionConfig::new();
3659        let mut conv = XmlToStamConverter::new(&config);
3660        let template_in = "{{ $x/y/z/@a }}";
3661        let template_out = conv.precompile(template_in);
3662        assert_eq!(template_out, "{{ ELEMENT_x_IN_y_IN_z_IN_ATTRIB_a }}");
3663        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$x/y/z/@a"));
3664        Ok(())
3665    }
3666
3667    #[test]
3668    fn test_loadconfig() -> Result<(), String> {
3669        let config = XmlConversionConfig::from_toml_str(CONF)?;
3670        let mut conv = XmlToStamConverter::new(&config);
3671        conv.compile().map_err(|e| format!("{}",e))?;
3672        assert_eq!(conv.config.namespaces.len(),4 , "number of namespaces");
3673        assert_eq!(conv.config.elements.len(), 15, "number of elements");
3674        assert_eq!(conv.config.baseelements.len(), 2, "number of baseelements");
3675        assert_eq!(conv.config.elements.get(0).unwrap().annotationdata.len(), 7,"number of annotationdata under first element");
3676        assert_eq!(conv.config.baseelements.get("common").unwrap().annotationdata.len(), 7,"number of annotationdata under baseelement common");
3677        Ok(())
3678    }
3679
3680    #[test]
3681    fn test_small() -> Result<(), String> {
3682        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3683        let mut store = stam::AnnotationStore::new(stam::Config::new());
3684        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3685        let res = store.resource("test").expect("resource must have been created at this point");
3686        assert_eq!(res.text(), "TEST\n\nThis is a test.\n", "resource text");
3687        assert_eq!(store.annotations_len(), 6, "number of annotations");
3688        let annotation = store.annotation("emphasis").expect("annotation must have been created at this point");
3689        assert_eq!(annotation.text_simple(), Some("test"));
3690        //eprintln!("DEBUG: {:?}",annotation.data().collect::<Vec<_>>());
3691        let key = store.key("urn:stam-fromhtml", "style").expect("key must exist");
3692        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("color:green"));
3693        let key = store.key("urn:stam-fromhtml", "title").expect("key must exist");
3694        let annotation = res.annotations_as_metadata().filter_key(&key).next().expect("annotation");
3695        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("test"));
3696        let bodyannotation = store.annotation("body").expect("body annotation not found");
3697        let title1 = store.key("urn:stam-fromhtml", "title_from_parent").expect("key must exist");
3698        let title2 = store.key("urn:stam-fromhtml", "title_from_root").expect("key must exist");
3699        assert_eq!(bodyannotation.data().filter_key(&title1).value_as_str(), Some("test"));
3700        assert_eq!(bodyannotation.data().filter_key(&title2).value_as_str(), Some("test"));
3701        Ok(())
3702    }
3703
3704    #[test]
3705    fn test_full() -> Result<(), String> {
3706        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3707        let mut store = stam::AnnotationStore::new(stam::Config::new());
3708        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3709        let res = store.resource("test").expect("resource must have been created at this point");
3710        assert_eq!(res.text(), XMLEXAMPLE_TEXTOUTPUT, "resource text");
3711        Ok(())
3712    }
3713
3714    #[test]
3715    fn test_firstfruit() -> Result<(), String> {
3716        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3717        let mut store = stam::AnnotationStore::new(stam::Config::new());
3718        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3719        let bodyannotation = store.annotation("body").expect("body annotation not found");
3720        let fruit = store.key("urn:stam-fromhtml", "firstfruit").expect("key must exist");
3721        assert_eq!(bodyannotation.data().filter_key(&fruit).value_as_str(), Some("apple") );
3722        Ok(())
3723    }
3724
3725    #[test]
3726    fn test_fruits() -> Result<(), String> {
3727        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3728        let mut store = stam::AnnotationStore::new(stam::Config::new());
3729        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3730        let bodyannotation = store.annotation("body").expect("body annotation not found");
3731        let fruits = store.key("urn:stam-fromhtml", "fruits").expect("key must exist");
3732        assert_eq!(bodyannotation.data().filter_key(&fruits).value(), Some(&DataValue::List(vec!("apple".into(),"banana".into(),"melon".into()) )));
3733        Ok(())
3734    }
3735
3736    #[test]
3737    fn test_multifruits() -> Result<(), String> {
3738        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3739        let mut store = stam::AnnotationStore::new(stam::Config::new());
3740        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3741        let bodyannotation = store.annotation("body").expect("body annotation not found");
3742        let fruits = store.key("urn:stam-fromhtml", "multifruits").expect("key must exist");
3743        let results: Vec<_> = bodyannotation.data().filter_key(&fruits).collect();
3744        assert_eq!(results.len(), 3);
3745        assert_eq!(results.get(0).unwrap().value(),&DataValue::String("apple".to_string()) );
3746        assert_eq!(results.get(1).unwrap().value(),&DataValue::String("banana".to_string()) );
3747        assert_eq!(results.get(2).unwrap().value(),&DataValue::String("melon".to_string()) );
3748        Ok(())
3749    }
3750
3751    #[test]
3752    fn test_teispace() -> Result<(), String> {
3753        let config = XmlConversionConfig::from_toml_str(CONF)?;
3754        let mut store = stam::AnnotationStore::new(stam::Config::new());
3755        from_xml_in_memory("test", XMLTEISPACE, &config, &mut store)?;
3756        let res = store.resource("test").expect("resource must have been created at this point");
3757        assert_eq!(res.text(), "\n\n\n\n", "resource text");
3758        Ok(())
3759    }
3760
3761
3762    #[test]
3763    fn test_reqattrib() -> Result<(), String> {
3764        let config = XmlConversionConfig::from_toml_str(CONF)?;
3765        let mut store = stam::AnnotationStore::new(stam::Config::new());
3766        from_xml_in_memory("test", XMLREQATTRIBEXAMPLE, &config, &mut store)?;
3767        let res = store.resource("test").expect("resource must have been created at this point");
3768        assert_eq!(res.text(), "test", "resource text");
3769        let key = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3770        let annotation = store.annotation("ann1").expect("annotation");
3771        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("blah"));
3772        assert!(store.key("urn:stam-fromhtml", "optattrib").is_none(), "optional attrib is unused");
3773        Ok(())
3774    }
3775
3776    #[test]
3777    fn test_reqattrib2() -> Result<(), String> {
3778        let mut config = XmlConversionConfig::from_toml_str(CONF)?;
3779        config = config.with_debug(true);
3780        let mut store = stam::AnnotationStore::new(stam::Config::new());
3781        assert!(from_xml_in_memory("test", XMLREQATTRIBEXAMPLE2, &config, &mut store).is_err(), "checking if error is returned");
3782        Ok(())
3783    }
3784
3785    #[test]
3786    fn test_reqattrib3() -> Result<(), String> {
3787        let config = XmlConversionConfig::from_toml_str(CONF)?;
3788        let mut store = stam::AnnotationStore::new(stam::Config::new());
3789        from_xml_in_memory("test", XMLREQATTRIBEXAMPLE3, &config, &mut store)?;
3790        let res = store.resource("test").expect("resource must have been created at this point");
3791        assert_eq!(res.text(), "test", "resource text");
3792        let reqkey = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3793        let optkey = store.key("urn:stam-fromhtml", "optattrib").expect("key optattrib must exist");
3794        let annotation = store.annotation("ann1").expect("annotation");
3795        assert_eq!(annotation.data().filter_key(&reqkey).value_as_str(), Some("blah"));
3796        assert_eq!(annotation.data().filter_key(&optkey).value_as_str(), Some("blah"));
3797        Ok(())
3798    }
3799
3800    #[test]
3801    fn test_map() -> Result<(), String> {
3802        let config = XmlConversionConfig::from_toml_str(CONF)?;
3803        let mut store = stam::AnnotationStore::new(stam::Config::new());
3804        from_xml_in_memory("test", XMLMAPEXAMPLE, &config, &mut store)?;
3805        let res = store.resource("test").expect("resource must have been created at this point");
3806        assert_eq!(res.text(), "test", "resource text");
3807        let key = store.key("urn:stam-fromhtml", "map").expect("key must exist");
3808        let annotation = store.annotation("ann1").expect("annotation");
3809        let data = annotation.data().filter_key(&key).value().expect("data must exist");
3810        if let DataValue::Map(data) = data {
3811            assert_eq!(data.get("text"), Some(&DataValue::String("test".into())));
3812            assert_eq!(data.get("number"), Some(&DataValue::Int(42)));
3813            assert_eq!(data.get("bogus"), Some(&DataValue::Bool(true)));
3814            assert_eq!(data.len(), 3);
3815        } else {
3816            assert!(false, "Data is supposed to be a map");
3817        }
3818        Ok(())
3819    }
3820
3821    #[test]
3822    fn test_metadata() -> Result<(), String> {
3823        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3824        let mut store = stam::AnnotationStore::new(stam::Config::new());
3825        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3826        let annotation = store.annotation("metadata").expect("annotation");
3827        let key = store.key("urn:stam-fromhtml", "author").expect("key must exist");
3828        let data = annotation.data().filter_key(&key).value().expect("data must exist");
3829        assert_eq!(data, &DataValue::String("proycon".into()));
3830        Ok(())
3831    }
3832
3833    #[test]
3834    fn test_datavalue_int() -> Result<(), String> {
3835        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3836        let mut store = stam::AnnotationStore::new(stam::Config::new());
3837        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3838        let annotation = store.annotation("p1").expect("annotation not found");
3839        let key = store.key("urn:stam-fromhtml", "n").expect("key must exist");
3840        assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::Int(1)));
3841        Ok(())
3842    }
3843
3844    #[test]
3845    fn test_datavalue_string() -> Result<(), String> {
3846        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3847        let mut store = stam::AnnotationStore::new(stam::Config::new());
3848        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3849        let annotation = store.annotation("p1").expect("annotation not found");
3850        let key = store.key("urn:stam-fromhtml", "nstring").expect("key must exist");
3851        assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::String("001".to_string())));
3852        Ok(())
3853    }
3854
3855}