Skip to main content

stamtools/
xml.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, HashMap, BTreeSet};
3use std::fmt::Display;
4use std::fs::read_to_string;
5use std::path::Path;
6use std::hash::{Hash,DefaultHasher,Hasher};
7use std::process::{Command,  Stdio};
8use std::io::{ BufWriter, Write};
9
10use roxmltree::{Document, Node, NodeId, ParsingOptions};
11use serde::Deserialize;
12use stam::*;
13use toml;
14use upon::Engine;
15use std::fmt::Write as FmtWrite;
16use serde_json;
17
18const NS_XML: &str = "http://www.w3.org/XML/1998/namespace";
19const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
20
21
22fn default_set() -> String {
23    "urn:stam-fromxml".into()
24}
25
26#[derive(Deserialize)]
27/// Holds the configuration for mapping a specific XML format to STAM
28pub struct XmlConversionConfig {
29    #[serde(default)]
30    /// Holds configurations for mapping specific XML elements to STAM, evaluated in reverse-order, so put more generic rules before specific ones
31    elements: Vec<XmlElementConfig>,
32
33    #[serde(default)]
34    /// Base elements are named templates, other elements can derive from this
35    baseelements: HashMap<String, XmlElementConfig>,
36
37    #[serde(default)]
38    /// Maps XML prefixes to namespace
39    namespaces: HashMap<String, String>,
40
41    #[serde(default = "XmlWhitespaceHandling::collapse")]
42    /// Default whitespace handling
43    whitespace: XmlWhitespaceHandling,
44
45    #[serde(default)]
46    /// Sets additional context variables that can be used in templates
47    context: HashMap<String, toml::Value>,
48
49    #[serde(default)]
50    /// Sets additional context variables that can be used in templates
51    metadata: Vec<MetadataConfig>,
52
53    #[serde(default)]
54    /// Inject a DTD (for XML entity resolution)
55    inject_dtd: Option<String>,
56
57    #[serde(default = "default_set")]
58    default_set: String,
59
60    #[serde(default)]
61    /// A prefix to assign when setting annotation IDs
62    id_prefix: Option<String>,
63
64    #[serde(default)]
65    /// A suffix to strip when setting annotation IDs
66    id_strip_suffix: Vec<String>,
67
68    #[serde(default)]
69    /// Add provenance information pointing each annotation to the appropriate node in the XML source files where it came from (translates into XPathSelector in Web Annotation output)
70    provenance: bool,
71
72    #[serde(default)]
73    external_filters: Vec<ExternalFilter>,
74
75    #[serde(skip_deserializing)]
76    debug: bool,
77
78}
79
80impl XmlConversionConfig {
81    pub fn new() -> Self {
82        Self {
83            elements: Vec::new(),
84            baseelements: HashMap::new(),
85            namespaces: HashMap::new(),
86            context: HashMap::new(),
87            metadata: Vec::new(),
88            whitespace: XmlWhitespaceHandling::Collapse,
89            default_set: default_set(),
90            inject_dtd: None,
91            id_prefix: None,
92            id_strip_suffix: Vec::new(),
93            provenance: false,
94            external_filters: Vec::new(),
95            debug: false,
96        }
97    }
98
99    pub fn resolve_baseelements(&mut self) -> Result<(), XmlConversionError> {
100        let mut replace: Vec<(usize, XmlElementConfig)> = Vec::new();
101        for (i, element) in self.elements.iter().enumerate() {
102            let mut newelement = None;
103            for basename in element.base.iter().rev() {
104                if let Some(baseelement) = self.baseelements.get(basename) {
105                    if newelement.is_none() {
106                        newelement = Some(element.clone());
107                    }
108                    newelement
109                        .as_mut()
110                        .map(|newelement| newelement.update(baseelement));
111                } else {
112                    return Err(XmlConversionError::ConfigError(format!(
113                        "No such base element: {}",
114                        basename
115                    )));
116                }
117            }
118            if let Some(newelement) = newelement {
119                replace.push((i, newelement));
120            }
121        }
122        for (i, element) in replace {
123            self.elements[i] = element;
124        }
125        Ok(())
126    }
127
128    /// Parse the configuration from a TOML string (load the data from file yourself).
129    pub fn from_toml_str(tomlstr: &str) -> Result<Self, String> {
130        let mut config: Self = toml::from_str(tomlstr).map_err(|e| format!("{}", e))?;
131        config.resolve_baseelements().map_err(|e| format!("{}", e))?;
132        Ok(config)
133    }
134
135    pub fn with_debug(mut self, value: bool) -> Self {
136        self.debug = value;
137        self
138    }
139
140    /// Add provenance information pointing each annotation to the appropriate node in the XML source files where it came from (translates into XPathSelector in Web Annotation output)
141    pub fn with_provenance(mut self, value: bool) -> Self {
142        self.provenance = value;
143        self
144    }
145
146    /// Register an XML namespace with prefix
147    pub fn with_prefix(mut self, prefix: impl Into<String>, namespace: impl Into<String>) -> Self {
148        self.namespaces.insert(prefix.into(), namespace.into());
149        self
150    }
151
152    /// A prefix to assign when setting annotation IDs, within this string you can use the special variable `{resource}` to use the resource ID.
153    pub fn with_id_prefix(mut self, prefix: impl Into<String>) -> Self {
154        self.id_prefix = Some(prefix.into());
155        self
156    }
157
158    /// A suffix to strip when assigning annotation IDs
159    pub fn with_id_strip_suffix(mut self, suffix: impl Into<String>) -> Self {
160        self.id_strip_suffix.push(suffix.into());
161        self
162    }
163
164    /// Inject a DTD (for XML entity resolution)
165    pub fn with_inject_dtd(mut self, dtd: impl Into<String>) -> Self {
166        self.inject_dtd = Some(dtd.into());
167        self
168    }
169
170    /// Set default whitespace handling
171    pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
172        self.whitespace = handling;
173        self
174    }
175
176    /// Set an element configuration
177    pub fn with_element<F>(mut self, expression: &str, setup: F) -> Self
178    where
179        F: Fn(XmlElementConfig) -> XmlElementConfig,
180    {
181        let expression = XPathExpression::new(expression);
182        let element = setup(XmlElementConfig::new(expression));
183        if self.debug {
184            eprintln!("[STAM fromxml] registered {:?}", element);
185        }
186        self.elements.push(element);
187        self
188    }
189
190    /// How to handle this element?
191    fn element_config(&self, node: Node, path: &NodePath) -> Option<&XmlElementConfig> {
192        for elementconfig in self.elements.iter().rev() {
193            if elementconfig.path.test(path, node, self) {
194                return Some(elementconfig);
195            }
196        }
197        None
198    }
199
200    pub fn add_context(&mut self, key: impl Into<String>, value: toml::Value) {
201        self.context.insert(key.into(), value);
202    }
203
204    pub fn debug(&self) -> bool {
205        self.debug
206    }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
210/// Determines how to handle whitespace for an XML element
211pub enum XmlWhitespaceHandling {
212    /// Not specified (used for base templates)
213    Unspecified,
214    //Inherit from parent
215    Inherit,
216    /// Whitespace is kept as is in the XML
217    Preserve,
218    /// all whitespace becomes space, consecutive whitespace is squashed
219    Collapse,
220}
221
222impl Default for XmlWhitespaceHandling {
223    fn default() -> Self {
224        XmlWhitespaceHandling::Unspecified
225    }
226}
227
228impl XmlWhitespaceHandling {
229    fn collapse() -> Self {
230        XmlWhitespaceHandling::Collapse
231    }
232}
233
234#[derive(Debug, Clone, Deserialize, PartialEq, Copy, Default)]
235pub enum XmlAnnotationHandling {
236    /// No annotation
237    #[default]
238    Unspecified,
239
240    /// No annotation
241    None,
242
243    /// Selects the text pertaining to the current element
244    TextSelector,
245
246    /// Selects the text pertaining to the current resource
247    ResourceSelector,
248
249    /// Selects the text between the current element and the next instance of the same element type
250    TextSelectorBetweenMarkers,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254/// XML Element configuration, determines how to map an XML element (identified by an XPath expression) to STAM
255pub struct XmlElementConfig {
256    /// This is XPath-like expression (just a small subset of XPath) to identify an element by its path
257
258    #[serde(default)]
259    path: XPathExpression,
260
261    #[serde(default)]
262    annotation: XmlAnnotationHandling,
263
264    #[serde(default)]
265    annotationdata: Vec<XmlAnnotationDataConfig>,
266
267    /// Template or None for no text handling, prefixes are never targeted by annotations
268    #[serde(default)]
269    textprefix: Option<String>,
270
271    /// Extract text. None means unspecified and defaults to false.
272    #[serde(default)]
273    text: Option<bool>,
274
275    /// Template or None for no text handling, suffixes are never targeted by annotations
276    #[serde(default)]
277    textsuffix: Option<String>,
278
279    // Annotation data for the text prefix
280    #[serde(default)]
281    annotatetextprefix: Vec<XmlAnnotationDataConfig>,
282
283    // Annotation data for the text suffix
284    #[serde(default)]
285    annotatetextsuffix: Vec<XmlAnnotationDataConfig>,
286
287    /// Include the text prefix in the annotation's text selector. None means unspecified and defaults to false
288    #[serde(default)]
289    include_textprefix: Option<bool>,
290
291    /// Include the text suffix in the annotation's text selector. None means unspecified and defaults to false
292    #[serde(default)]
293    include_textsuffix: Option<bool>,
294
295    /// Base elements to derive from
296    #[serde(default)]
297    base: Vec<String>,
298
299    /// Template or None for no ID extraction
300    #[serde(default)]
301    id: Option<String>,
302
303    #[serde(default)]
304    /// Descend into children (false) or not? (true). None means unspecified and defaults to false
305    stop: Option<bool>,
306
307    #[serde(default)]
308    /// Whitespace handling for this element
309    whitespace: XmlWhitespaceHandling,
310
311    #[serde(default)]
312    /// Assigns a scope id to this text range, it can later be referenced to constrain marker based annotation via `marker_scope`
313    scope_id: Option<String>,
314
315    #[serde(default)]
316    /// If annotation handling is TextSelectorBetweenMarkers, this sets a scope so the last marker won't transcend (otherwise you get all text of the document)
317    /// The scope refers to the `scope_id` of another element that was used in text extraction.
318    marker_scope: Option<String>,
319
320}
321
322impl XmlElementConfig {
323    fn new(expression: XPathExpression) -> Self {
324        Self {
325            path: expression,
326            stop: None,
327            whitespace: XmlWhitespaceHandling::Unspecified,
328            annotation: XmlAnnotationHandling::Unspecified,
329            annotationdata: Vec::new(),
330            base: Vec::new(),
331            id: None,
332            textprefix: None,
333            text: None,
334            textsuffix: None,
335            annotatetextprefix: Vec::new(),
336            annotatetextsuffix: Vec::new(),
337            include_textprefix: None,
338            include_textsuffix: None,
339            scope_id: None,
340            marker_scope: None,
341        }
342    }
343
344    pub fn update(&mut self, base: &XmlElementConfig) {
345        if self.whitespace == XmlWhitespaceHandling::Unspecified
346            && base.whitespace != XmlWhitespaceHandling::Unspecified
347        {
348            self.whitespace = base.whitespace;
349        }
350        if self.annotation == XmlAnnotationHandling::Unspecified
351            && base.annotation != XmlAnnotationHandling::Unspecified
352        {
353            self.annotation = base.annotation;
354        }
355        if self.textprefix.is_none() && base.textprefix.is_some() {
356            self.textprefix = base.textprefix.clone();
357        }
358        if self.text.is_none() && base.text.is_some() {
359            self.text = base.text;
360        }
361        if self.textsuffix.is_none() && base.textsuffix.is_some() {
362            self.textsuffix = base.textsuffix.clone();
363        }
364        if self.id.is_none() && base.id.is_some() {
365            self.id = base.id.clone();
366        }
367        if self.stop.is_none() && base.stop.is_some() {
368            self.stop = base.stop;
369        }
370        for annotationdata in base.annotationdata.iter() {
371            if !self.annotationdata.contains(annotationdata) {
372                self.annotationdata.push(annotationdata.clone());
373            }
374        }
375        if self.annotatetextsuffix.is_empty() && !base.annotatetextsuffix.is_empty() {
376            self.annotatetextsuffix = base.annotatetextsuffix.clone();
377        }
378        if self.annotatetextprefix.is_empty() && !base.annotatetextprefix.is_empty() {
379            self.annotatetextprefix = base.annotatetextprefix.clone();
380        }
381        if self.include_textsuffix.is_none() {
382            self.include_textsuffix = base.include_textsuffix;
383        }
384        if self.include_textprefix.is_none() {
385            self.include_textprefix = base.include_textprefix;
386        }
387    }
388
389
390    /// This sets the mode that determines how the element is handledhttps://www.youtube.com/watch?v=G_BrbhRrP6g
391    pub fn with_stop(mut self, stop: bool) -> Self {
392        self.stop = Some(stop);
393        self
394    }
395
396    /// This sets the whitespace handling for this element
397    pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
398        self.whitespace = handling;
399        self
400    }
401
402    pub fn with_text(mut self, text: bool) -> Self {
403        self.text = Some(text);
404        self
405    }
406
407    pub fn with_base(mut self, iter: impl Iterator<Item = impl Into<String>>) -> Self {
408        self.base = iter.into_iter().map(|s| s.into()).collect();
409        self
410    }
411
412    pub fn without_text(mut self) -> Self {
413        self.text = None;
414        self
415    }
416
417    pub fn with_annotation(mut self, annotation: XmlAnnotationHandling) -> Self {
418        self.annotation = annotation;
419        self
420    }
421
422    /// Not a very safe hash function (just uses an address uniquely associated with this object) but works for our ends
423    fn hash(&self) -> usize {
424        self.path.0.as_ptr() as usize
425    }
426}
427
428impl PartialEq for XmlElementConfig {
429    fn eq(&self, other: &Self) -> bool {
430        self.hash() == other.hash()
431    }
432}
433
434#[derive(Debug, Clone, Deserialize, PartialEq)]
435pub struct XmlAnnotationDataConfig {
436    /// Template
437    id: Option<String>,
438    /// Template
439    set: Option<String>,
440    /// Template
441    key: Option<String>,
442    /// Any string values are interpreted as templates
443    value: Option<toml::Value>,
444
445    /// The type of the value, will be automatically detected if not set.
446    #[serde(default)]
447    valuetype: Option<String>,
448
449    /// Allow value templates that yield an empty string?
450    #[serde(default)]
451    allow_empty_value: bool,
452
453    /// Skip this data entirely if any underlying variables in the templates are undefined
454    #[serde(default)]
455    skip_if_missing: bool,
456
457
458    /// If the value is a list, convert it to multiple annotationdata instances with the same key, one for each of the values
459    #[serde(default)]
460    multiple: bool,
461}
462
463impl XmlAnnotationDataConfig {
464    pub fn with_id(mut self, id: impl Into<String>) -> Self {
465        self.id = Some(id.into());
466        self
467    }
468
469    pub fn with_set(mut self, set: impl Into<String>) -> Self {
470        self.set = Some(set.into());
471        self
472    }
473
474    pub fn with_key(mut self, key: impl Into<String>) -> Self {
475        self.key = Some(key.into());
476        self
477    }
478
479    pub fn with_value(mut self, value: impl Into<toml::Value>) -> Self {
480        self.value = Some(value.into());
481        self
482    }
483}
484
485/// Not really full XPath, just a very minor subset
486#[derive(Debug, Clone, PartialEq, Deserialize)]
487struct XPathExpression(String);
488
489impl XPathExpression {
490    pub fn new(expression: impl Into<String>) -> Self {
491        Self(expression.into())
492    }
493
494    pub fn any() -> Self {
495        Self("*".into())
496    }
497
498    pub fn iter<'a>(
499        &'a self,
500        config: &'a XmlConversionConfig,
501    ) -> impl Iterator<Item = (Option<&'a str>, &'a str, Option<&'a str>)> {
502        self.0.trim_start_matches('/').split("/").map(|segment| {
503            //eprintln!("DEBUG: segment={}", segment);
504            let (prefix, name, condition) = Self::parse_segment(segment);
505            let namespace = if let Some(prefix) = prefix {
506                if let Some(namespace) = config.namespaces.get(prefix).map(|x| x.as_str()) {
507                    Some(namespace)
508                } else {
509                    panic!(
510                        "XML namespace prefix not known in configuration: {}",
511                        prefix
512                    );
513                }
514            } else {
515                None
516            };
517            (namespace, name, condition)
518        })
519    }
520
521    /// matches a node path against an XPath-like expression
522    fn test<'a, 'b>(&self, path: &NodePath<'a, 'b>, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
523        let refiter = self.iter(config).collect::<Vec<_>>().into_iter().rev();
524        let pathiter = path.components.iter().rev();
525        self.test_withiter(refiter, pathiter, node, config)
526    }
527
528    /// matches a node path against an XPath-like expression
529    fn test_withiter<'a, 'b>(&self, mut refiter: impl Iterator<Item=(Option<&'a str>, &'a str, Option<&'a str>)> + Clone, mut pathiter: impl Iterator<Item=&'a NodePathComponent<'a, 'b>> + Clone, mut node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
530        while let Some((refns, refname, condition)) = refiter.next() {
531            if refns.is_none() && refname == "" && condition.is_none() {
532                // This is a `//` selector, we bifurcate here so we match both in case SOMETHING matches as well as when NOTHING matches, the recursion covers the latter logic route
533                if self.test_withiter(refiter.clone(), pathiter.clone(), node, config) {
534                    return true;
535                }
536            }
537            if let Some(component) = pathiter.next() {
538                /*if config.debug() {
539                    eprintln!("[STAM fromxml]          testing component {:?} against refns={:?} refname={} condition={:?}", component, refns, refname, condition);
540                }*/
541                if refname != "" && refname != "*" {
542                    if refns.is_none() != component.namespace.is_none() || component.namespace != refns || refname != component.tagname {
543                        return false;
544                    }
545                }
546                if let Some(condition) = condition {
547                    if !self.test_condition(condition, node, config) {
548                        return false;
549                    }
550                }
551                if let Some(parent) = node.parent() {
552                    node = parent;
553                }
554            } else {
555                if refname != "" {
556                    return false;
557                }
558            }
559        }
560        /* if config.debug() {
561            eprintln!("[STAM fromxml]          match");
562        }*/
563        true
564    }
565
566
567    fn test_condition<'a,'b>(&self, condition: &'a str, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
568        for condition in condition.split(" and ") { //MAYBE TODO: doesn't take literals into account yet!
569            if let Some(pos) = condition.find("!=") {
570                let var = &condition[..pos];
571                let right = condition[pos+2..].trim_matches('"');
572                if self.get_var(var, &node, config) == Some(right) {
573                    return false;
574                }
575            } else if let Some(pos) = condition.find("=") {
576                let var = &condition[..pos];
577                let right = condition[pos+1..].trim_matches('"');
578                let value = self.get_var(var, &node, config);
579                if value != Some(right) {
580                    return false;
581                }
582            } else {
583                //condition is one variable and merely needs to exist
584                let v = self.get_var(condition, &node, config);
585                if v.is_none() || v == Some("") {
586                    return false;
587                }
588            }
589        }
590        /*if config.debug() {
591            eprintln!("[STAM fromxml]          condition matches");
592        }*/
593        true
594    }
595
596    /// Resolve a variable from a conditional expression, given a variable name, node and config
597    fn get_var<'a,'b>(&self, var: &str, node: &Node<'a,'b>, config: &XmlConversionConfig) -> Option<&'a str> { 
598        if var.starts_with("@") {
599            if let Some(pos) = var.find(":") {
600                let prefix = &var[1..pos];
601                if let Some(ns) = config.namespaces.get(prefix) {
602                    let var = &var[pos+1..];
603                    node.attribute((ns.as_str(),var))
604                } else {
605                    None
606                }
607            } else {
608                node.attribute(&var[1..])
609            }
610        } else if var == "text()" {
611            node.text().map(|s|s.trim())
612        } else {
613            None
614        }
615    }
616
617    /// Parses a segment into a namespace-prefix, a name and a condition
618    fn parse_segment<'a>(s: &'a str) -> (Option<&'a str>, &'a str, Option<&'a str>) {
619        let (name, condition) = if let (Some(begin), Some(end)) = (s.find("["), s.rfind("]")) {
620            (&s[..begin], Some(&s[begin + 1..end]))
621        } else {
622            (s, None)
623        };
624        if let Some((prefix, name)) = name.split_once(":") {
625            (Some(prefix), name, condition)
626        } else {
627            (None, name, condition)
628        }
629    }
630}
631
632
633
634impl Default for XPathExpression {
635    fn default() -> Self {
636        Self::any()
637    }
638}
639
640#[derive(Clone, Debug, PartialEq)]
641struct NodePathComponent<'a,'b> {
642    namespace: Option<&'a str>,
643    tagname: &'b str,
644    /// Index sequence number, 1-indexed (as specified by XPath)
645    index: Option<usize>,
646}
647
648#[derive(Clone, Debug, PartialEq, Default)]
649struct NodePath<'a, 'b> {
650    components: Vec<NodePathComponent<'a,'b>>,
651}
652
653impl<'a, 'b> Display for NodePath<'a, 'b> {
654    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655        for component in self.components.iter() {
656            write!(f, "/")?;
657            if let Some(ns) = component.namespace {
658                if let Some(index) = component.index {
659                    write!(f, "{{{}}}{}[{}]", ns, component.tagname, index)?;
660                } else {
661                    write!(f, "{{{}}}{}", ns, component.tagname)?;
662                }
663            } else {
664                if let Some(index) = component.index {
665                    write!(f, "{}[{}]", component.tagname, index)?;
666                } else {
667                    write!(f, "{}", component.tagname)?;
668                }
669            }
670        }
671        Ok(())
672    }
673}
674
675impl<'a,'b> NodePath<'a,'b> {
676    fn add(&mut self, node: &Node<'a,'b>, index: Option<usize>) {
677        if node.tag_name().name() != "" {
678            self.components.push(
679                NodePathComponent {
680                    namespace: node.tag_name().namespace(),
681                    tagname: node.tag_name().name(),
682                    index,
683                }
684            )
685        }
686    }
687
688    fn format_as_xpath(&self, prefixes: &HashMap<String, String>) -> String {
689        let mut out = String::new();
690        for component in self.components.iter() {
691            out.push('/');
692            if let Some(ns) = component.namespace {
693                if let Some(prefix) = prefixes.get(ns) {
694                    if let Some(index) = component.index {
695                        out += &format!("{}:{}[{}]", prefix, component.tagname, index);
696                    } else {
697                        out += &format!("{}:{}", prefix, component.tagname);
698                    }
699                } else {
700                    eprintln!("STAM fromxml WARNING: format_as_xpath: namespace {} not defined, no prefix found!", ns);
701                    if let Some(index) = component.index {
702                        out += &format!("{}[{}]", component.tagname, index);
703                    } else {
704                        out += &format!("{}", component.tagname);
705                    }
706                }
707            } else {
708                if let Some(index) = component.index {
709                    out += &format!("{}[{}]", component.tagname, index);
710                } else {
711                    out += &format!("{}", component.tagname);
712                }
713            }
714        }
715        out
716    }
717}
718
719
720/// Counts elder siblings, used to determine index values
721#[derive(Default,Debug)]
722struct SiblingCounter {
723    map: HashMap<String,usize>,
724}
725
726impl SiblingCounter {
727    fn count<'a,'b>(&mut self, node: &Node<'a,'b>) -> usize {
728        let s = format!("{:?}", node.tag_name());
729        *self.map.entry(s).and_modify(|c| {*c += 1;}).or_insert(1)
730    }
731}
732
733
734#[derive(Debug, Clone, Deserialize)]
735/// XML Element configuration, determines how to map an XML element (identified by an XPath expression) to STAM
736pub struct MetadataConfig {
737    /// This is XPath-like expression (just a small subset of XPath) to identify an element by its path
738    #[serde(default)]
739    annotation: XmlAnnotationHandling,
740
741    #[serde(default)]
742    annotationdata: Vec<XmlAnnotationDataConfig>,
743
744    /// Template or None for no ID extraction
745    #[serde(default)]
746    id: Option<String>,
747}
748
749/// Translate an XML file to STAM, given a particular configuration
750pub fn from_xml<'a>(
751    filename: &Path,
752    config: &XmlConversionConfig,
753    store: &'a mut AnnotationStore,
754) -> Result<(), String> {
755    if config.debug {
756        eprintln!("[STAM fromxml] parsing {}", filename.display());
757    }
758
759    // Read the raw XML data
760    let mut xmlstring = read_to_string(filename)
761        .map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
762
763    //patchy: remove HTML5 doctype and inject our own
764    if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
765        xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
766    }
767
768    // we can only inject a DTD if there is no doctype
769    if xmlstring[..100].find("<!DOCTYPE").is_none() {
770        if let Some(dtd) = config.inject_dtd.as_ref() {
771            xmlstring = dtd.to_string() + &xmlstring
772        };
773    } else if config.inject_dtd.is_some() {
774        eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
775    }
776
777    // parse the raw XML data into a DOM
778    let doc = Document::parse_with_options(
779        &xmlstring,
780        ParsingOptions {
781            allow_dtd: true,
782            ..ParsingOptions::default()
783        },
784    )
785    .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
786
787    let mut converter = XmlToStamConverter::new(config);
788    converter
789        .compile()
790        .map_err(|e| format!("Error compiling templates: {}", e))?;
791
792    let textoutfilename = format!(
793        "{}.txt",
794        filename
795            .file_stem()
796            .expect("invalid filename")
797            .to_str()
798            .expect("invalid utf-8 in filename")
799    );
800
801    // extract text (first pass)
802    let mut path = NodePath::default();
803    path.add(&doc.root_element(), None);
804    converter
805        .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), 0)
806        .map_err(|e| {
807            format!(
808                "Error extracting element text from {}: {}",
809                filename.display(),
810                e
811            )
812        })?;
813    if config.debug {
814        eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
815    }
816    let resource = TextResourceBuilder::new()
817        .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
818        .with_text(converter.text.clone())
819        .with_filename(&textoutfilename);
820
821    converter.resource_handle = Some(
822        store
823            .add_resource(resource)
824            .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
825    );
826
827    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
828
829    // extract annotations (second pass)
830    converter
831        .extract_element_annotation(doc.root_element(), &path,  Some(&filename.to_string_lossy()),0,  store)
832        .map_err(|e| {
833            format!(
834                "Error extracting element annotation from {}: {}",
835                filename.display(),
836                e
837            )
838        })?;
839
840    Ok(())
841}
842
843/// Translate an XML file to STAM, given a particular configuration. This translates multiple XML files to a single output file.
844pub fn from_multi_xml<'a>(
845    filenames: &Vec<&Path>,
846    outputfile: Option<&Path>,
847    config: &XmlConversionConfig,
848    store: &'a mut AnnotationStore,
849) -> Result<(), String> {
850
851    let textoutfilename = if let Some(outputfile) = outputfile {
852        format!("{}",outputfile.to_str().expect("invalid utf-8 in filename"))
853    } else {
854        format!(
855            "{}.txt",
856                filenames.iter().next().expect("1 or more filename need to be provided")
857                .file_stem()
858                .expect("invalid filename")
859                .to_str()
860                .expect("invalid utf-8 in filename")
861        )
862    };
863
864    // Read the raw XML data
865    let mut xmlstrings: Vec<String> = Vec::new();
866    let mut docs: Vec<Document> = Vec::new();
867    for filename in filenames.iter() {
868        if config.debug {
869            eprintln!("[STAM fromxml] parsing {} (one of multiple)", filename.display());
870        }
871        //patchy: remove HTML5 doctype and inject our own
872        let mut xmlstring = read_to_string(filename).map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
873        if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
874            xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
875        }
876        // we can only inject a DTD if there is no doctype
877        if xmlstring[..100].find("<!DOCTYPE").is_none() {
878            if let Some(dtd) = config.inject_dtd.as_ref() {
879                xmlstring = dtd.to_string() + &xmlstring
880            };
881        } else if config.inject_dtd.is_some() {
882            eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
883        }
884        xmlstrings.push(xmlstring);
885    }
886
887    for (filename, xmlstring) in filenames.iter().zip(xmlstrings.iter()) {
888        // parse the raw XML data into a DOM
889        let doc = Document::parse_with_options(
890            xmlstring,
891            ParsingOptions {
892                allow_dtd: true,
893                ..ParsingOptions::default()
894            },
895        )
896        .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
897        docs.push(doc);
898    }
899
900    let mut converter = XmlToStamConverter::new(config);
901    converter
902        .compile()
903        .map_err(|e| format!("Error compiling templates: {}", e))?;
904
905    for (i, (doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
906        let mut path = NodePath::default();
907        path.add(&doc.root_element(), None);
908        // extract text (first pass)
909        converter
910            .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), i)
911            .map_err(|e| {
912                format!(
913                    "Error extracting element text from {}: {}",
914                    filename.display(),
915                    e
916                )
917            })?;
918        if config.debug {
919            eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
920        }
921    }
922
923    let resource = TextResourceBuilder::new()
924        .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
925        .with_text(converter.text.clone())
926        .with_filename(&textoutfilename);
927
928    converter.resource_handle = Some(
929        store
930            .add_resource(resource)
931            .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
932    );
933
934    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
935
936    // extract annotations (second pass)
937    for (i,(doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
938        let mut path = NodePath::default();
939        path.add(&doc.root_element(), None);
940        converter
941            .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),i,  store)
942            .map_err(|e| {
943                format!(
944                    "Error extracting element annotation from {}: {}",
945                    filename.display(),
946                    e
947                )
948            })?;
949    }
950
951    Ok(())
952}
953
954/// Translate an XML file to STAM, given a particular configuration. Not writing output files and keeping all in memory. Does not support DTD injection.
955pub fn from_xml_in_memory<'a>(
956    resource_id: &str, 
957    xmlstring: &str,
958    config: &XmlConversionConfig,
959    store: &'a mut AnnotationStore,
960) -> Result<(), String> {
961    if config.debug {
962        eprintln!("[STAM fromxml] parsing XML string");
963    }
964
965    // parse the raw XML data into a DOM
966    let doc = Document::parse_with_options(
967        &xmlstring,
968        ParsingOptions {
969            allow_dtd: true,
970            ..ParsingOptions::default()
971        },
972    )
973    .map_err(|e| format!("Error parsing XML string: {}",  e))?;
974
975    let mut converter = XmlToStamConverter::new(config);
976    converter
977        .compile()
978        .map_err(|e| format!("Error compiling templates: {}", e))?;
979
980    let mut path = NodePath::default();
981    path.add(&doc.root_element(), None);
982    // extract text (first pass)
983    converter
984        .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(resource_id), Some(resource_id), 0)
985        .map_err(|e| {
986            format!(
987                "Error extracting element text from {}: {}",
988                resource_id,
989                e
990            )
991        })?;
992    if config.debug {
993        eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
994    }
995    let resource = TextResourceBuilder::new()
996        .with_id(resource_id)
997        .with_text(converter.text.clone());
998
999    converter.resource_handle = Some(
1000        store
1001            .add_resource(resource)
1002            .map_err(|e| format!("Failed to add resource {}: {}", &resource_id, e))?,
1003    );
1004
1005    converter.add_metadata(store).map_err(|e| format!("Failed to add metadata for {}: {}", &resource_id, e))?;
1006
1007    // extract annotations (second pass)
1008    converter
1009        .extract_element_annotation(doc.root_element(), &path, Some(resource_id), 0, store)
1010        .map_err(|e| {
1011            format!(
1012                "Error extracting element annotation from {}: {}",
1013                resource_id,
1014                e
1015            )
1016        })?;
1017
1018    Ok(())
1019}
1020
1021pub fn filename_to_id<'a>(filename: &'a str, config: &XmlConversionConfig) -> &'a str {
1022    for suffix in config.id_strip_suffix.iter() {
1023        if filename.ends_with(suffix) {
1024            return &filename[..filename.len() - suffix.len()];
1025        }
1026    }
1027    return filename;
1028}
1029
1030#[derive(Clone,Copy,PartialEq, Hash, Eq)]
1031enum PositionType {
1032    Body,
1033    TextPrefix,
1034    TextSuffix,
1035}
1036
1037struct XmlToStamConverter<'a> {
1038    /// The current character position the conversion process is at
1039    cursor: usize,
1040
1041    /// The extracted plain-text after/during untangling
1042    text: String,
1043
1044    /// The template engine
1045    template_engine: Engine<'a>,
1046
1047    /// Keep track of the new positions (unicode offset) where the node starts in the untangled document. The key consist of a document sequence number and a node ID.
1048    positionmap: HashMap<(usize,NodeId,PositionType), Offset>,
1049
1050    /// Keep track of the new positions (bytes offset) where the node starts in the untangled document. The key consist of a document sequence number and a node ID.
1051    bytepositionmap: HashMap<(usize,NodeId,PositionType), (usize, usize)>,
1052
1053    /// Keep track of markers (XML elements with `XmlAnnotationHandling::TextSelectorBetweenMarkers`), the key in this map is some hash of XmlElementConfig.
1054    markers: HashMap<usize, Vec<(usize,NodeId)>>,
1055
1056    /// Keep track of scopes. These are used to find marker scopes. Only the last scope is registered. The key is an Xpath expression. The value is a sequence number and node ID which can subsequently be looked up in the position maps
1057    scopes: HashMap<String, (usize,NodeId)>,
1058
1059    /// The resource
1060    resource_handle: Option<TextResourceHandle>,
1061
1062    /// Used to keep track of whether we need to insert a whitespace before actual text
1063    pending_whitespace: bool,
1064
1065    /// The configuration
1066    config: &'a XmlConversionConfig,
1067
1068    /// Namespace to prefix map
1069    prefixes: HashMap<String, String>,
1070
1071    ///  Global context for template
1072    global_context: BTreeMap<String, upon::Value>,
1073
1074    /// Variable names per template
1075    variables: BTreeMap<String, BTreeSet<&'a str>>,
1076
1077    debugindent: String,
1078}
1079
1080pub enum XmlConversionError {
1081    StamError(StamError),
1082    TemplateError(String, Option<upon::Error>),
1083    ConfigError(String),
1084}
1085
1086impl From<StamError> for XmlConversionError {
1087    fn from(error: StamError) -> Self {
1088        Self::StamError(error)
1089    }
1090}
1091
1092impl From<upon::Error> for XmlConversionError {
1093    fn from(error: upon::Error) -> Self {
1094        Self::TemplateError("".into(), Some(error))
1095    }
1096}
1097
1098impl Display for XmlConversionError {
1099    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1100        match self {
1101            Self::StamError(e) => e.fmt(f),
1102            Self::TemplateError(s, e) => {
1103                f.write_str(s.as_str())?;
1104                f.write_str(": ")?;
1105                if let Some(e) = e {
1106                    e.fmt(f)?;
1107                }
1108                f.write_str("")
1109            }
1110            Self::ConfigError(e) => e.fmt(f),
1111        }
1112    }
1113}
1114
1115impl<'a> XmlToStamConverter<'a> {
1116    fn new(config: &'a XmlConversionConfig) -> Self {
1117        let mut prefixes: HashMap<String, String> = HashMap::new();
1118        for (prefix, namespace) in config.namespaces.iter() {
1119            prefixes.insert(namespace.to_string(), prefix.to_string());
1120        }
1121        let mut template_engine = Engine::new();
1122        template_engine.set_default_formatter(&value_formatter); //this one serializes Lists like in JSON
1123        template_engine.add_function("capitalize", filter_capitalize);
1124        template_engine.add_function("lower", str::to_lowercase);
1125        template_engine.add_function("upper", str::to_uppercase);
1126        template_engine.add_function("trim", |s: &str| s.trim().to_string() );
1127        template_engine.add_function("add", filter_add);
1128        template_engine.add_function("sub", filter_sub);
1129        template_engine.add_function("mul", filter_mul);
1130        template_engine.add_function("div", filter_div);
1131        template_engine.add_function("eq", |a: &upon::Value, b: &upon::Value| a == b);
1132        template_engine.add_function("ne", |a: &upon::Value, b: &upon::Value| a != b);
1133        template_engine.add_function("gt", filter_gt);
1134        template_engine.add_function("lt", filter_lt);
1135        template_engine.add_function("gte", filter_gte);
1136        template_engine.add_function("lte", filter_lte);
1137        template_engine.add_function("int", |a: &upon::Value| match a {
1138            upon::Value::Integer(x) => upon::Value::Integer(*x),
1139            upon::Value::Float(x) => upon::Value::Integer(*x as i64), 
1140            upon::Value::String(s) => upon::Value::Integer(s.parse().expect("int filter expects an integer value")),
1141            _ => panic!("int filter expects an integer value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1142        });
1143        template_engine.add_function("float", |a: &upon::Value| match a {
1144            upon::Value::Float(_) => a.clone(),
1145            upon::Value::Integer(x) => upon::Value::Float(*x as f64),
1146            upon::Value::String(s) => upon::Value::Float(s.parse().expect("float filter expects a float value")),
1147            _ => panic!("int filter expects an integer value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1148        });
1149        template_engine.add_function("str", |a: upon::Value| match a {
1150            upon::Value::Integer(x) => upon::Value::String(format!("{}",x)),
1151            upon::Value::Float(x) => upon::Value::String(format!("{}",x)),
1152            upon::Value::Bool(x) => upon::Value::String(format!("{}",x)),
1153            upon::Value::String(_) => a,
1154            upon::Value::None => upon::Value::String(String::new()),
1155            upon::Value::List(list) => { //too much cloning but it'll do for now
1156                let newlist: Vec<String> = list.iter().map(|v| match v {
1157                    upon::Value::String(s) => s.clone(),
1158                    upon::Value::Integer(d) => format!("{}",d),
1159                    upon::Value::Float(d) => format!("{}",d),
1160                    upon::Value::Bool(d) => format!("{}",d),
1161                    _ => String::new(),
1162                }).collect();
1163                upon::Value::String(newlist.join(", "))
1164            },
1165            _ => panic!("map to string not implemented"), //<< --^  TODO: PANIC IS WAY TO STRICT
1166        });
1167        template_engine.add_function("as_range", |a: i64| upon::Value::List(std::ops::Range { start: 0, end: a }.into_iter().map(|x| upon::Value::Integer(x+1)).collect::<Vec<_>>()) );
1168        template_engine.add_function("last", |list: &[upon::Value]| list.last().map(Clone::clone));
1169        template_engine.add_function("first", |list: &[upon::Value]| {
1170            list.first().map(Clone::clone)
1171        });
1172        template_engine.add_function("tokenize", |s: &str| {
1173            upon::Value::List(
1174                s.split(|c| c == ' ' || c == '\n').filter_map(|x|
1175                    if !x.is_empty() {
1176                        Some(upon::Value::String(x.to_string()))
1177                    } else {
1178                        None
1179                    }
1180                )
1181                .collect::<Vec<upon::Value>>())
1182        });
1183        template_engine.add_function("replace", |s: &str, from: &str, to: &str| {
1184            upon::Value::String(s.replace(from,to))
1185        });
1186        template_engine.add_function("starts_with", |s: &str, prefix: &str| {
1187            s.starts_with(prefix)
1188        });
1189        template_engine.add_function("ends_with", |s: &str, suffix: &str| {
1190            s.ends_with(suffix)
1191        });
1192        template_engine.add_function("basename", |a: &upon::Value| match a {
1193            upon::Value::String(s) => upon::Value::String(s.split(|c| c == '/' || c == '\\').last().expect("splitting must work").to_string()),
1194            _ => panic!("basename filter expects a string value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1195        });
1196        template_engine.add_function("noext", |a: &upon::Value| match a {
1197            upon::Value::String(s) => if let Some(pos) = s.rfind('.') {
1198                s[..pos].to_string()
1199            } else {
1200                s.to_string()
1201            },
1202            _ => panic!("basename filter expects a string value"), //<< --^  TODO: PANIC IS WAY TO STRICT
1203        });
1204        template_engine.add_function("join", |list: &upon::Value, delimiter: &str| match list {
1205            upon::Value::List(list) => { //too much cloning but it'll do for now
1206                let newlist: Vec<String> = list.iter().map(|v| match v {
1207                    upon::Value::String(s) => s.clone(),
1208                    upon::Value::Integer(d) => format!("{}",d),
1209                    upon::Value::Float(d) => format!("{}",d),
1210                    upon::Value::Bool(d) => format!("{}",d),
1211                    _ => String::new(),
1212                }).collect();
1213                upon::Value::String(newlist.join(delimiter))
1214            },
1215            _ => {
1216                list.clone() //was not really a list after all, just pass it on so we don't need to panic
1217            }
1218        });
1219        let mut converter = Self {
1220            cursor: 0,
1221            text: String::new(),
1222            template_engine,
1223            positionmap: HashMap::new(),
1224            bytepositionmap: HashMap::new(),
1225            scopes:  HashMap::new(),
1226            markers: HashMap::new(),
1227            resource_handle: None,
1228            pending_whitespace: false,
1229            global_context: BTreeMap::new(),
1230            debugindent: String::new(),
1231            variables: BTreeMap::new(),
1232            prefixes,
1233            config,
1234        };
1235        converter.set_global_context();
1236        converter.add_external_filters();
1237        converter
1238    }
1239
1240    fn add_external_filters(&mut self) {
1241        for filter in self.config.external_filters.clone() {
1242            self.template_engine.add_function(filter.name.clone(), move |value: &upon::Value| filter.run(value)  );
1243        }
1244    }
1245
1246    /// Compile templates
1247    fn compile(&mut self) -> Result<(), XmlConversionError> {
1248        if self.config.debug {
1249            eprintln!("[STAM fromxml] compiling templates");
1250        }
1251        for element in self.config.elements.iter() {
1252            if let Some(textprefix) = element.textprefix.as_ref() {
1253                if self.template_engine.get_template(textprefix.as_str()).is_none() {
1254                    let template = self.precompile(textprefix.as_str());
1255                    self.template_engine
1256                        .add_template(textprefix.clone(), template)
1257                        .map_err(|e| {
1258                            XmlConversionError::TemplateError(
1259                                format!("element/textprefix template {}", textprefix.clone()),
1260                                Some(e),
1261                            )
1262                        })?;
1263                }
1264            }
1265            if let Some(textsuffix) = element.textsuffix.as_ref() {
1266                if self.template_engine.get_template(textsuffix.as_str()).is_none() {
1267                    let template = self.precompile(textsuffix.as_str());
1268                    self.template_engine
1269                        .add_template(textsuffix.clone(), template)
1270                        .map_err(|e| {
1271                            XmlConversionError::TemplateError(
1272                                format!("element/textsuffix template {}", textsuffix.clone()),
1273                                Some(e),
1274                            )
1275                        })?;
1276                }
1277            }
1278            if let Some(id) = element.id.as_ref() {
1279                if self.template_engine.get_template(id.as_str()).is_none() {
1280                    let template = self.precompile(id.as_str());
1281                    self.template_engine.add_template(id.clone(), template).map_err(|e| {
1282                        XmlConversionError::TemplateError(
1283                            format!("element/id template {}", id.clone()),
1284                            Some(e),
1285                        )
1286                    })?;
1287                }
1288            }
1289            for annotationdata in element.annotationdata.iter().chain(element.annotatetextprefix.iter()).chain(element.annotatetextsuffix.iter()) {
1290                if let Some(id) = annotationdata.id.as_ref() {
1291                    if self.template_engine.get_template(id.as_str()).is_none() {
1292                        let template = self.precompile(id.as_str());
1293                        self.template_engine.add_template(id.clone(), template).map_err(|e| {
1294                            XmlConversionError::TemplateError(
1295                                format!("annotationdata/id template {}", id.clone()),
1296                                Some(e),
1297                            )
1298                        })?;
1299                    }
1300                }
1301                if let Some(set) = annotationdata.set.as_ref() {
1302                    if self.template_engine.get_template(set.as_str()).is_none() {
1303                        let template = self.precompile(set.as_str());
1304                        //eprintln!("------- DEBUG: {} -> {}", set.as_str(), template);
1305                        self.template_engine.add_template(set.clone(), template).map_err(|e| {
1306                            XmlConversionError::TemplateError(
1307                                format!("annotationdata/set template {}", set.clone()),
1308                                Some(e),
1309                            )
1310                        })?;
1311                    }
1312                }
1313                if let Some(key) = annotationdata.key.as_ref() {
1314                    if self.template_engine.get_template(key.as_str()).is_none() {
1315                        let template = self.precompile(key.as_str());
1316                        self.template_engine.add_template(key.clone(), template).map_err(|e| {
1317                            XmlConversionError::TemplateError(
1318                                format!("annotationdata/key template {}", key.clone()),
1319                                Some(e),
1320                            )
1321                        })?;
1322                    }
1323                }
1324                if let Some(value) = annotationdata.value.as_ref() {
1325                    self.compile_value(value)?;
1326                }
1327            }
1328        }
1329        for metadata in self.config.metadata.iter() {
1330            if let Some(id) = metadata.id.as_ref() {
1331                if self.template_engine.get_template(id.as_str()).is_none() {
1332                    let template = self.precompile(id.as_str());
1333                    self.template_engine.add_template(id.clone(), template).map_err(|e| {
1334                        XmlConversionError::TemplateError(
1335                            format!("metadata/id template {}", id.clone()),
1336                            Some(e),
1337                        )
1338                    })?;
1339                }
1340            }
1341            for annotationdata in metadata.annotationdata.iter() {
1342                if let Some(id) = annotationdata.id.as_ref() {
1343                    if self.template_engine.get_template(id.as_str()).is_none() {
1344                        let template = self.precompile(id.as_str());
1345                        self.template_engine.add_template(id.clone(), template).map_err(|e| {
1346                            XmlConversionError::TemplateError(
1347                                format!("annotationdata/id template {}", id.clone()),
1348                                Some(e),
1349                            )
1350                        })?;
1351                    }
1352                }
1353                if let Some(set) = annotationdata.set.as_ref() {
1354                    if self.template_engine.get_template(set.as_str()).is_none() {
1355                        let template = self.precompile(set.as_str());
1356                        //eprintln!("------- DEBUG: {} -> {}", set.as_str(), template);
1357                        self.template_engine.add_template(set.clone(), template).map_err(|e| {
1358                            XmlConversionError::TemplateError(
1359                                format!("annotationdata/set template {}", set.clone()),
1360                                Some(e),
1361                            )
1362                        })?;
1363                    }
1364                }
1365                if let Some(key) = annotationdata.key.as_ref() {
1366                    if self.template_engine.get_template(key.as_str()).is_none() {
1367                        let template = self.precompile(key.as_str());
1368                        self.template_engine.add_template(key.clone(), template).map_err(|e| {
1369                            XmlConversionError::TemplateError(
1370                                format!("annotationdata/key template {}", key.clone()),
1371                                Some(e),
1372                            )
1373                        })?;
1374                    }
1375                }
1376                if let Some(value) = annotationdata.value.as_ref() {
1377                    self.compile_value(value)?;
1378                }
1379            }
1380        }
1381        Ok(())
1382    }
1383
1384    /// Compile templates from a value, all strings are considered templates
1385    fn compile_value(&mut self, value: &'a toml::Value) -> Result<(), XmlConversionError> {
1386        match value {
1387            toml::Value::String(value) => {
1388                if self.template_engine.get_template(value.as_str()).is_none() {
1389                    let template = self.precompile(value.as_str());
1390                    self.template_engine.add_template(value.clone(), template).map_err(|e| {
1391                        XmlConversionError::TemplateError(
1392                            format!("annotationdata/value template {}", value.clone()),
1393                            Some(e),
1394                        )
1395                    })?;
1396                }
1397            }
1398            toml::Value::Table(map) => {
1399                for (_key, value) in map.iter() {
1400                    self.compile_value(value)?;
1401                }
1402            },
1403            toml::Value::Array(list) => {
1404                for value in list.iter() {
1405                    self.compile_value(value)?;
1406                }
1407            }
1408            _ => {} //no templates in other types
1409        }
1410        Ok(())
1411    }
1412
1413    /// untangle text, extract the text (and only the text)
1414    /// from an XML document, according to the
1415    /// mapping configuration and creates a STAM TextResource for it.
1416    /// Records exact offsets per element/node for later use during annotation extraction.
1417    fn extract_element_text<'b>(
1418        &mut self,
1419        node: Node<'a,'b>,
1420        path: &NodePath<'a,'b>,
1421        whitespace: XmlWhitespaceHandling,
1422        resource_id: Option<&str>,
1423        inputfile: Option<&str>,
1424        doc_num: usize,
1425    ) -> Result<(), XmlConversionError> {
1426        if self.config.debug {
1427            eprintln!("[STAM fromxml]{} extracting text for element {}", self.debugindent, path);
1428        }
1429        let mut begin = self.cursor; //current character pos marks the begin
1430        let mut bytebegin = self.text.len(); //current byte pos marks the begin
1431        let mut end_discount = 0; //the discount may be needed later if textsuffixes are outputted (which we do not want as part of the annotation)
1432        let mut end_bytediscount = 0;
1433        let mut firsttext = true; //tracks whether we have already outputted some text, needed for whitespace handling
1434
1435        let mut elder_siblings = SiblingCounter::default();
1436
1437        // obtain the configuration that applies to this element
1438        if let Some(element_config) = self.config.element_config(node, path) {
1439            if self.config.debug {
1440                eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1441            }
1442
1443            if (element_config.stop == Some(false) || element_config.stop.is_none())
1444                && element_config.annotation != XmlAnnotationHandling::TextSelectorBetweenMarkers
1445            {
1446                //do text extraction for this element
1447
1448                let whitespace = if node.has_attribute((NS_XML, "space")) {
1449                    // if there is an explicit xml:space attributes, it overrides whatever whitespace handling we have set:
1450                    match node.attribute((NS_XML, "space")).unwrap() {
1451                        "preserve" => XmlWhitespaceHandling::Preserve,
1452                        "collapse" | "replace" => XmlWhitespaceHandling::Collapse,
1453                        _ => whitespace,
1454                    }
1455                } else if element_config.whitespace == XmlWhitespaceHandling::Inherit
1456                    || element_config.whitespace == XmlWhitespaceHandling::Unspecified
1457                {
1458                    whitespace //from parent, i.e. passed to this (recursive) function by caller
1459                } else {
1460                    element_config.whitespace //default from the config
1461                };
1462
1463                // process the text prefix, a text template to include prior to the actual text
1464                self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1465
1466                let textbegin = self.cursor;
1467                // process all child elements
1468                for child in node.children() {
1469                    if self.config.debug {
1470                        eprintln!("[STAM fromxml]{} child {:?}", self.debugindent,child);
1471                        eprintln!("[STAM fromxml]{}  cursor={} begin={} textbegin={}", self.debugindent, self.cursor, begin, textbegin);
1472                    }
1473                    if child.is_text() && element_config.text == Some(true) {
1474                        // extract the actual element text
1475                        // this may trigger multiple times if the XML element (`node`) has mixed content
1476
1477                        let mut innertext = child.text().expect("text node must have text");
1478                        let mut pending_whitespace = false;
1479                        let mut leading_whitespace = false;
1480                        if whitespace == XmlWhitespaceHandling::Collapse && !innertext.is_empty() {
1481                            // analyse what kind of whitespace we are dealing with
1482                            let mut all_whitespace = true;
1483                            leading_whitespace = innertext.chars().next().unwrap().is_whitespace();
1484
1485                            // any pending whitespace after this element is 'buffered' in this boolean
1486                            // and only written out depending on the next text's whitespace situation
1487                            // it will later be assigned to self.pending_whitespace just before going next iteration
1488                            pending_whitespace = innertext
1489                                .chars()
1490                                .inspect(|c| {
1491                                    if !c.is_whitespace() {
1492                                        all_whitespace = false
1493                                    }
1494                                })
1495                                .last()
1496                                .unwrap()
1497                                .is_whitespace();
1498                            if all_whitespace {
1499                                self.pending_whitespace = true;
1500                                if self.config.debug {
1501                                    eprintln!(
1502                                        "[STAM fromxml]{} ^- all whitespace, flag pending whitespace and skipping...",
1503                                        self.debugindent,
1504                                    );
1505                                }
1506                                continue;
1507                            }
1508                            innertext = innertext.trim();
1509                            if self.config.debug {
1510                                eprintln!(
1511                                    "[STAM fromxml]{} ^- collapsed whitespace: {:?}",
1512                                    self.debugindent,
1513                                    innertext
1514                                );
1515                            }
1516                        }
1517                        if self.pending_whitespace || leading_whitespace {
1518                            //output any pending whitespace from the previous iteration, or leading whitespace from this one
1519                            if !self.text.is_empty()
1520                                && !self.text.chars().rev().next().unwrap().is_whitespace()
1521                            {
1522                                if self.config.debug {
1523                                    eprintln!("[STAM fromxml]{} ^- outputting pending whitespace",self.debugindent);
1524                                }
1525                                self.text.push(' ');
1526                                self.cursor += 1;
1527                                if firsttext && self.pending_whitespace {
1528                                    begin += 1;
1529                                    bytebegin += 1;
1530                                    if self.config.debug {
1531                                        eprintln!("[STAM fromxml]{}  firsttext, begin is now {}, cursor {}",self.debugindent, begin, self.cursor);
1532                                    }
1533                                }
1534                            }
1535                            self.pending_whitespace = false;
1536                        }
1537
1538                        // finally we output the actual text, and advance the cursor
1539                        if whitespace == XmlWhitespaceHandling::Collapse {
1540                            let mut prevc = ' ';
1541                            let mut innertext = innertext.replace(|c: char| c.is_whitespace(), " ");
1542                            innertext.retain(|c| {
1543                                let do_retain = c != ' ' || prevc != ' ';
1544                                prevc = c;
1545                                do_retain
1546                            });
1547                            self.text += &innertext;
1548                            self.cursor += innertext.chars().count();
1549                            if self.config.debug {
1550                                eprintln!("[STAM fromxml]{} ^- outputting text child (collapsed whitespace), cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1551                            }
1552                        } else {
1553                            self.text += &innertext;
1554                            self.cursor += innertext.chars().count();
1555                            if self.config.debug {
1556                                eprintln!("[STAM fromxml]{} ^- outputting text child, cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1557                            }
1558                        }
1559                        firsttext = self.cursor == textbegin;
1560
1561                        //set the pending whitespace buffer (bool) for next iteration
1562                        self.pending_whitespace = pending_whitespace;
1563                    } else if child.is_element() {
1564                        if self.config.debug {
1565                            eprintln!("[STAM fromxml]{} \\- extracting text for this child", self.debugindent);
1566                        }
1567                        self.debugindent.push_str("  ");
1568                        // recursion step, process child element, pass our whitespace handling mode since it may inherit it
1569                        let mut path = path.clone();
1570                        let count = elder_siblings.count(&child);
1571                        path.add(&child, Some(count));
1572                        self.extract_element_text(child, &path, whitespace, resource_id, inputfile, doc_num)?;
1573                        firsttext = self.cursor == textbegin;
1574                        self.debugindent.pop();
1575                        self.debugindent.pop();
1576                    } else {
1577                        if self.config.debug {
1578                            eprintln!("[STAM fromxml]{} ^- skipping this child node", self.debugindent);
1579                        }
1580                        continue;
1581                    }
1582                }
1583
1584                // process the text suffix, a preconfigured string of text to include after to the actual text
1585                self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, textbegin)?;
1586
1587                // Assign a scope ID if provided (used for constraining the scope of marker annotations later on)
1588                if let Some(scope_id) = element_config.scope_id.as_ref() {
1589                    self.scopes.insert( scope_id.clone(), (doc_num, node.id()) );
1590                }
1591            } else if element_config.annotation == XmlAnnotationHandling::TextSelectorBetweenMarkers
1592            {
1593                // this is a marker, keep track of it so we can extract the span between markers in [`extract_element_annotation()`] later
1594                if self.config.debug {
1595                    eprintln!("[STAM fromxml]{} adding to markers (textprefix={:?}, textsuffix={:?})", self.debugindent, element_config.textprefix, element_config.textsuffix);
1596                }
1597
1598
1599                self.markers
1600                    .entry(element_config.hash())
1601                    .and_modify(|v| v.push((doc_num, node.id())))
1602                    .or_insert(vec![(doc_num, node.id())]);
1603
1604                // for markers it doesn't matter whether something text is defined as a prefix or suffix, it's functionally the same because a marker has no text itself
1605
1606                self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1607                self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, self.cursor)?;
1608            }
1609        } else if self.config.debug {
1610            eprintln!(
1611                "[STAM fromxml]{} WARNING: no match, skipping text extraction for element {}",
1612                self.debugindent,
1613                path
1614            );
1615        }
1616
1617        // Last, we store the new text offsets for this element/node so
1618        // we can use it in [`extract_element_annotation()`] to associate
1619        // actual annotations with this span.
1620        if begin <= (self.cursor - end_discount) {
1621            let offset = Offset::simple(begin, self.cursor - end_discount);
1622            if self.config.debug {
1623                eprintln!(
1624                    "[STAM fromxml]{} extracted text for {} @{:?}: {:?}",
1625                    self.debugindent,
1626                    path,
1627                    &offset,
1628                    &self.text[bytebegin..(self.text.len() - end_bytediscount)]
1629                );
1630            }
1631            self.positionmap.insert((doc_num, node.id(), PositionType::Body), offset);
1632            self.bytepositionmap
1633                .insert((doc_num, node.id(), PositionType::Body), (bytebegin, self.text.len() - end_bytediscount));
1634        }
1635        Ok(())
1636    }
1637
1638    /// process the text prefix, a text template to include prior to the actual text
1639    fn process_textprefix<'b>(
1640        &mut self,
1641        element_config: &XmlElementConfig,
1642        node: Node<'a,'b>,
1643        resource_id: Option<&str>,
1644        inputfile: Option<&str>,
1645        doc_num: usize,
1646        begin: &mut usize,
1647        bytebegin: &mut usize
1648    ) -> Result<(), XmlConversionError> {
1649        if let Some(textprefix) = &element_config.textprefix {
1650            self.pending_whitespace = false;
1651            if self.config.debug {
1652                eprintln!("[STAM fromxml]{} outputting textprefix: {:?}", self.debugindent, textprefix);
1653            }
1654            let result =
1655                self.render_template(textprefix, &node, Some(self.cursor), None, resource_id, inputfile, doc_num)
1656                    .map_err(|e| match e {
1657                        XmlConversionError::TemplateError(s, e) => {
1658                            XmlConversionError::TemplateError(
1659                                format!(
1660                                "whilst rendering textprefix template '{}' for node '{}': {}",
1661                                textprefix, node.tag_name().name(), s
1662                            ),
1663                                e,
1664                            )
1665                        }
1666                        e => e,
1667                    })?;
1668            let result_charlen = result.chars().count();
1669
1670            if !element_config.annotatetextprefix.is_empty() {
1671                //record the offsets for textprefix annotation later
1672                let offset = Offset::simple(self.cursor, self.cursor + result_charlen);
1673                self.positionmap.insert((doc_num, node.id(), PositionType::TextPrefix), offset);
1674                self.bytepositionmap
1675                    .insert((doc_num, node.id(), PositionType::TextPrefix), (*bytebegin, *bytebegin + result.len()));
1676            }
1677
1678            self.cursor += result_charlen;
1679            self.text += &result;
1680
1681            if element_config.include_textprefix != Some(true) {
1682                // the textprefix will not be part of the annotation's text selection, increment the offsets:
1683                *begin += result_charlen;
1684                *bytebegin += result.len();
1685            }
1686        }
1687        Ok(())
1688    }
1689
1690    /// process the text suffix, a preconfigured string of text to include after to the actual text
1691    fn process_textsuffix<'b>(
1692        &mut self,
1693        element_config: &XmlElementConfig,
1694        node: Node<'a,'b>,
1695        resource_id: Option<&str>,
1696        inputfile: Option<&str>,
1697        doc_num: usize,
1698        end_discount: &mut usize,
1699        end_bytediscount: &mut usize,
1700        textbegin: usize,
1701    ) -> Result<(), XmlConversionError> {
1702        if let Some(textsuffix) = &element_config.textsuffix {
1703            if self.config.debug {
1704                eprintln!("[STAM fromxml]{} outputting textsuffix: {:?}", self.debugindent, textsuffix);
1705            }
1706            let result = self.render_template(
1707                textsuffix.as_str(),
1708                &node,
1709                Some(textbegin),
1710                Some(self.cursor),
1711                resource_id,
1712                inputfile,
1713                doc_num
1714            ).map_err(|e| match e {
1715                    XmlConversionError::TemplateError(s, e) => {
1716                        XmlConversionError::TemplateError(
1717                            format!(
1718                                "whilst rendering textsuffix template '{}' for node '{}': {}",
1719                                textsuffix,
1720                                node.tag_name().name(),
1721                                s
1722                            ),
1723                            e,
1724                        )
1725                    }
1726                    e => e,
1727            })?;
1728            let end_discount_tmp = result.chars().count();
1729            let end_bytediscount_tmp = result.len();
1730
1731
1732            self.text += &result;
1733
1734            if !element_config.annotatetextsuffix.is_empty() {
1735                //record the offsets for textsuffix annotation later
1736                let offset = Offset::simple(self.cursor, self.cursor + end_discount_tmp);
1737                self.positionmap.insert((doc_num, node.id(), PositionType::TextSuffix), offset);
1738                self.bytepositionmap
1739                    .insert((doc_num, node.id(), PositionType::TextSuffix), (self.text.len() - end_bytediscount_tmp, self.text.len()));
1740            }
1741
1742            self.cursor += end_discount_tmp;
1743            self.pending_whitespace = false;
1744
1745            if element_config.include_textsuffix == Some(true) {
1746                // the textsuffix will be part of the annotation's text selection, no discount for later
1747                *end_discount = 0;
1748                *end_bytediscount = 0;
1749            } else {
1750                // the textsuffix will not be part of the annotation's text selection, set discounts for later
1751                *end_discount = end_discount_tmp;
1752                *end_bytediscount = end_bytediscount_tmp;
1753            }
1754        }
1755        Ok(())
1756    }
1757
1758    /// extract annotations from the XML document
1759    /// according to the mapping configuration and creates a STAM TextResource for it.
1760    /// The text, for the full document, must have already been extracted earlier with [`extract_element_text()`].
1761    /// This relies on the exact offsets per element/node computed earlier during text extraction (`positionmap`).
1762    fn extract_element_annotation<'b>(
1763        &mut self,
1764        node: Node<'a,'b>,
1765        path: &NodePath<'a,'b>,
1766        inputfile: Option<&str>,
1767        doc_num: usize,
1768        store: &mut AnnotationStore,
1769    ) -> Result<(), XmlConversionError> {
1770        if self.config.debug {
1771            eprintln!("[STAM fromxml]{} extracting annotation from {}", self.debugindent, path);
1772        }
1773
1774        let mut elder_siblings = SiblingCounter::default();
1775
1776        // obtain the configuration that applies to this element
1777        if let Some(element_config) = self.config.element_config(node, &path) {
1778            if self.config.debug {
1779                eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1780            }
1781            if element_config.annotation != XmlAnnotationHandling::None
1782                && element_config.annotation != XmlAnnotationHandling::Unspecified
1783            {
1784                let mut builder = AnnotationBuilder::new();
1785
1786                //prepare variables to pass to the template context
1787                let offset = self.positionmap.get(&(doc_num, node.id(), PositionType::Body));
1788                if element_config.annotation == XmlAnnotationHandling::TextSelector {
1789                    if let Some((beginbyte, endbyte)) = self.bytepositionmap.get(&(doc_num, node.id(), PositionType::Body)) {
1790                        if self.config.debug {
1791                            eprintln!("[STAM fromxml]{} annotation covers text {:?} (bytes {}-{})", self.debugindent, offset, beginbyte, endbyte);
1792                        }
1793                    }  else if self.text.is_empty() {
1794                        return Err(XmlConversionError::ConfigError("Can't extract annotations on text if no text was extracted!".into()));
1795                    }
1796                }
1797                let begin = if let Some(offset) = offset {
1798                    if let Cursor::BeginAligned(begin) = offset.begin {
1799                        Some(begin)
1800                    } else {
1801                        None
1802                    }
1803                } else {
1804                    None
1805                };
1806                let end = if let Some(offset) = offset {
1807                    if let Cursor::BeginAligned(end) = offset.end {
1808                        Some(end)
1809                    } else {
1810                        None
1811                    }
1812                } else {
1813                    None
1814                };
1815
1816                let resource_id = if let Some(resource_handle) = self.resource_handle {
1817                    store.resource(resource_handle).unwrap().id()
1818                } else {
1819                    None
1820                };
1821
1822                let mut have_id = false;
1823                if let Some(template) = &element_config.id {
1824                    let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1825                    let compiled_template = self.template_engine.template(template.as_str());
1826                    let id = compiled_template.render(&context).to_string().map_err(|e| 
1827                            XmlConversionError::TemplateError(
1828                                format!(
1829                                    "whilst rendering id template '{}' for node '{}'",
1830                                    template,
1831                                    node.tag_name().name(),
1832                                ),
1833                                Some(e),
1834                            )
1835                        )?;
1836                    if !id.is_empty() {
1837                        builder = builder.with_id(id);
1838                        have_id = true;
1839                    }
1840                }
1841
1842                if !have_id {
1843                    //generate a random ID if we have none
1844                    if let Some(resource_id) = resource_id {
1845                        builder = builder.with_id(stam::generate_id(&format!("{}-",resource_id), ""));
1846                    } else {
1847                        builder = builder.with_id(stam::generate_id("", ""));
1848                    }
1849                }
1850
1851                builder = self.add_annotationdata_to_builder(element_config.annotationdata.iter(), builder, node.clone(), begin, end, resource_id, inputfile, doc_num)?;
1852
1853
1854                if self.config.provenance  && inputfile.is_some() {
1855                    let path_string = if let Some(id) = node.attribute((NS_XML,"id")) {
1856                        //node has an ID, use that
1857                        format!("//{}[@xml:id=\"{}\"]", self.get_node_name_for_xpath(&node), id)
1858                    } else {
1859                        //no ID, use full XPath expression
1860                        path.format_as_xpath(&self.prefixes)
1861                    };
1862                    let databuilder = AnnotationDataBuilder::new().with_dataset(CONTEXT_ANNO.into()).with_key("target".into()).with_value(
1863                        BTreeMap::from([
1864                            ("source".to_string(),inputfile.unwrap().into()),
1865                            ("selector".to_string(),
1866                                    BTreeMap::from([
1867                                        ("type".to_string(),"XPathSelector".into()),
1868                                        ("value".to_string(),path_string.into())
1869                                    ]).into()
1870                            )
1871                        ]).into()
1872                    );
1873                    builder = builder.with_data_builder(databuilder);
1874                }
1875
1876
1877                // Finish the builder and add the actual annotation to the store, according to its element handling
1878                match element_config.annotation {
1879                    XmlAnnotationHandling::TextSelector => {
1880                        // Annotation is on text, translates to TextSelector
1881                        if let Some(selector) = self.textselector(node, doc_num, PositionType::Body) {
1882                            builder = builder.with_target(selector);
1883                            if self.config.debug {
1884                                eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
1885                            }
1886                            store.annotate(builder)?;
1887                        }
1888                        if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1889                            self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1890                        }
1891                    }
1892                    XmlAnnotationHandling::ResourceSelector => {
1893                        // Annotation is metadata, translates to ResourceSelector
1894                        builder = builder.with_target(SelectorBuilder::ResourceSelector(
1895                            self.resource_handle.into(),
1896                        ));
1897                        if self.config.debug {
1898                            eprintln!("[STAM fromxml]   builder AnnotateResource: {:?}", builder);
1899                        }
1900                        store.annotate(builder)?;
1901                    }
1902                    XmlAnnotationHandling::TextSelectorBetweenMarkers => {
1903                        // Annotation is on a text span *between* two marker elements
1904                        if let Some(selector) =
1905                            self.textselector_for_markers(node, doc_num, store, element_config)
1906                        {
1907                            builder = builder.with_target(selector);
1908                            if self.config.debug {
1909                                eprintln!(
1910                                    "[STAM fromxml]   builder TextSelectorBetweenMarkers: {:?}",
1911                                    builder
1912                                );
1913                            }
1914                            store.annotate(builder)?;
1915                            if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1916                                self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1917                            }
1918                        }
1919                    }
1920                    _ => panic!(
1921                        "Invalid annotationhandling: {:?}",
1922                        element_config.annotation
1923                    ),
1924                }
1925            }
1926
1927            // Recursion step
1928            if element_config.stop == Some(false) || element_config.stop.is_none() {
1929                for child in node.children() {
1930                    if child.is_element() {
1931                        self.debugindent.push_str("  ");
1932                        let mut path = path.clone();
1933                        let count = elder_siblings.count(&child);
1934                        path.add(&child, Some(count));
1935                        //eprintln!("DEBUG: count={}, child={:?}, parent={:?}, elder_siblings={:?}", count, child.tag_name(), node.tag_name(), elder_siblings);
1936                        self.extract_element_annotation(child, &path, inputfile, doc_num, store)?;
1937                        self.debugindent.pop();
1938                        self.debugindent.pop();
1939                    }
1940                }
1941            }
1942        } else {
1943            eprintln!(
1944                "[STAM fromxml]{} WARNING: no match, skipping annotation extraction for element {}",
1945                self.debugindent,
1946                path
1947            );
1948        }
1949        Ok(())
1950    }
1951
1952    fn add_annotationdata_to_builder<'input>(&self, iter: impl Iterator<Item = &'a XmlAnnotationDataConfig>,
1953        mut builder: AnnotationBuilder<'a>,
1954        node: Node<'a, 'input>,
1955        begin: Option<usize>,
1956        end: Option<usize>,
1957        resource_id: Option<&str>,
1958        inputfile: Option<&str>,
1959        doc_num: usize,
1960    ) -> Result<AnnotationBuilder<'a>, XmlConversionError> {
1961        for annotationdata in iter {
1962            let mut databuilder = AnnotationDataBuilder::new();
1963            if let Some(template) = &annotationdata.set {
1964                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1965                let compiled_template = self.template_engine.template(template.as_str());
1966                let dataset = compiled_template.render(&context).to_string().map_err(|e|
1967                        XmlConversionError::TemplateError(
1968                            format!(
1969                                "whilst rendering annotationdata/dataset template '{}' for node '{}'",
1970                                template,
1971                                node.tag_name().name(),
1972                            ),
1973                            Some(e),
1974                        )
1975                    )?;
1976                if !dataset.is_empty() {
1977                    databuilder = databuilder.with_dataset(dataset.into())
1978                }
1979            } else {
1980                databuilder =
1981                    databuilder.with_dataset(self.config.default_set.as_str().into());
1982            }
1983            if let Some(template) = &annotationdata.key {
1984                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1985                let compiled_template = self.template_engine.template(template.as_str());
1986                match compiled_template.render(&context).to_string().map_err(|e| 
1987                        XmlConversionError::TemplateError(
1988                            format!(
1989                                "whilst rendering annotationdata/key template '{}' for node '{}'",
1990                                template,
1991                                node.tag_name().name(),
1992                            ),
1993                            Some(e),
1994                        )
1995                    )  {
1996                    Ok(key) if !key.is_empty() =>
1997                        databuilder = databuilder.with_key(key.into()) ,
1998                    Ok(_) if !annotationdata.skip_if_missing => {
1999                        return Err(XmlConversionError::TemplateError(
2000                            format!(
2001                                "whilst rendering annotationdata/key template '{}' for node '{}'",
2002                                template,
2003                                node.tag_name().name(),
2004                            ),
2005                            None
2006                        ));
2007                    },
2008                    Err(e) if !annotationdata.skip_if_missing => {
2009                        return Err(e)
2010                    },
2011                    _ => {
2012                        //skip whole databuilder if missing
2013                        continue
2014                    }
2015                }
2016            }
2017            if let Some(value) = &annotationdata.value {
2018                match self.extract_value(value,  node, annotationdata.allow_empty_value, annotationdata.skip_if_missing, annotationdata.valuetype.as_ref().map(|s| s.as_str()), begin, end, resource_id, inputfile, doc_num)? {
2019                    Some(DataValue::List(values)) if annotationdata.multiple => {
2020                        for value in values {
2021                            let mut databuilder_multi = databuilder.clone();
2022                            databuilder_multi = databuilder_multi.with_value(value);
2023                            builder = builder.with_data_builder(databuilder_multi);
2024                        }
2025                    },
2026                    Some(value) => {
2027                        databuilder = databuilder.with_value(value);
2028                    },
2029                    None =>  {
2030                        //skip whole databuilder if missing
2031                        continue
2032                    }
2033                }
2034            }
2035            if !annotationdata.multiple {
2036                builder = builder.with_data_builder(databuilder);
2037            }
2038        }
2039        Ok(builder)
2040    }
2041
2042    /// Annotates textprefix and textsuffix, if applicable
2043    fn annotate_textaffixes<'b>(
2044        &mut self,
2045        node: Node<'a,'b>,
2046        element_config: &XmlElementConfig,
2047        inputfile: Option<&str>,
2048        doc_num: usize,
2049        store: &mut AnnotationStore,
2050    ) -> Result<(), XmlConversionError> {
2051
2052
2053        if !element_config.annotatetextprefix.is_empty() {
2054            let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textprefix-", ""));
2055            if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextPrefix)) {
2056                let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2057                        Some(begin)
2058                    } else {
2059                        None
2060                    };
2061                let end = if let Cursor::BeginAligned(end) = offset.end {
2062                        Some(end)
2063                    } else {
2064                        None
2065                    };
2066                builder = self.add_annotationdata_to_builder(element_config.annotatetextprefix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; //MAYBE TODO: pass resource_id
2067                if let Some(selector) = self.textselector(node, doc_num, PositionType::TextPrefix) {
2068                    builder = builder.with_target(selector);
2069                    if self.config.debug {
2070                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2071                    }
2072                    store.annotate(builder)?;
2073                } else {
2074                    return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2075                }
2076            }
2077        }
2078
2079        if !element_config.annotatetextsuffix.is_empty() {
2080            let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textsuffix-", ""));
2081            if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextSuffix)) {
2082                let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2083                        Some(begin)
2084                    } else {
2085                        None
2086                    };
2087                let end = if let Cursor::BeginAligned(end) = offset.end {
2088                        Some(end)
2089                    } else {
2090                        None
2091                    };
2092                builder = self.add_annotationdata_to_builder(element_config.annotatetextsuffix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; //MAYBE TODO: pass resource_id
2093                if let Some(selector) = self.textselector(node, doc_num, PositionType::TextSuffix) {
2094                    builder = builder.with_target(selector);
2095                    if self.config.debug {
2096                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2097                    }
2098                    store.annotate(builder)?;
2099                } else {
2100                    return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2101                }
2102            }
2103        }
2104        Ok(())
2105    }
2106
2107    /// Extract values, running the templating engine in case of string values
2108    fn extract_value<'b>(&self, value: &'a toml::Value, node: Node<'a,'b>, allow_empty_value: bool, skip_if_missing: bool, valuetype: Option<&str>, begin: Option<usize>, end: Option<usize>, resource_id: Option<&str>, inputfile: Option<&str>, doc_num: usize) -> Result<Option<DataValue>, XmlConversionError>{
2109        match value {
2110            toml::Value::String(template) => {
2111                let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
2112                /*
2113                if self.config.debug() {
2114                    eprintln!(
2115                        "[STAM fromxml]              Context for annotationdata/map template '{}' for node '{}': {:?}",
2116                        template,
2117                        node.tag_name().name(),
2118                        context
2119                    );
2120                }
2121                */
2122                let compiled_template = self.template_engine.template(template.as_str()); //panics if doesn't exist, but that can't happen
2123                match compiled_template.render(&context).to_string().map_err(|e|
2124                        XmlConversionError::TemplateError(
2125                            format!(
2126                                "whilst rendering annotationdata/map template '{}' for node '{}'.{}",
2127                                template,
2128                                node.tag_name().name(),
2129                                if self.config.debug() {
2130                                    format!("\nContext was {:?}.\nVariables are: {:?}", context, self.variables.get(template))
2131                                } else {
2132                                    String::new()
2133                                }
2134                            ),
2135                            Some(e),
2136                        )
2137                    )  {
2138                    Ok(value) => {
2139                        if !value.is_empty() || allow_empty_value {
2140                            string_to_datavalue(value, valuetype).map(|v| Some(v))
2141                        } else {
2142                            //skip
2143                            Ok(None)
2144                        }
2145                    },
2146                    Err(e) if !skip_if_missing => {
2147                        Err(e)
2148                    },
2149                    Err(_) if allow_empty_value => {
2150                        Ok(Some("".into()))
2151                    },
2152                    Err(_) => {
2153                        //skip whole databuilder if missing
2154                        Ok(None)
2155                    }
2156                }
2157            },
2158            toml::Value::Table(map) => {
2159                let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2160                for (key, value) in map.iter() {
2161                    if let Some(value) = self.extract_value(value,  node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2162                        resultmap.insert(key.clone(), value);
2163                    }
2164                }
2165                Ok(Some(resultmap.into()))
2166            },
2167            toml::Value::Array(list) => {
2168                let mut resultlist: Vec<DataValue> = Vec::new();
2169                for value in list.iter() {
2170                    if let Some(value) = self.extract_value(value, node, false, true, None,  begin, end, resource_id, inputfile, doc_num)? {
2171                        resultlist.push(value);
2172                    }
2173                }
2174                Ok(Some(resultlist.into()))
2175            }
2176            toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2177            toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2178            toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2179            toml::Value::Datetime(_v) => {
2180                todo!("fromxml: Datetime conversion not implemented yet");
2181            }
2182        }
2183    }
2184
2185    /// Extract values for metadata (no associated node), running the templating engine in case of string values
2186    fn extract_value_metadata<'b>(&self, value: &'a toml::Value, context: &upon::Value, allow_empty_value: bool, skip_if_missing: bool, resource_id: Option<&str>) -> Result<Option<DataValue>, XmlConversionError>{
2187        match value {
2188            toml::Value::String(template) => {
2189                let compiled_template = self.template_engine.template(template.as_str()); //panics if doesn't exist, but that can't happen
2190                match compiled_template.render(&context).to_string().map_err(|e|
2191                        XmlConversionError::TemplateError(
2192                            format!(
2193                                "whilst rendering annotationdata/metadata template '{}' for metadata",
2194                                template,
2195                            ),
2196                            Some(e),
2197                        )
2198                    )  {
2199                    Ok(value) => {
2200                        if !value.is_empty() || allow_empty_value {
2201                            Ok(Some(value.into()))
2202                        } else {
2203                            //skip
2204                            Ok(None)
2205                        }
2206                    },
2207                    Err(e) if !skip_if_missing => {
2208                        Err(e)
2209                    },
2210                    Err(_) if allow_empty_value => {
2211                        Ok(Some("".into()))
2212                    },
2213                    Err(_) => {
2214                        //skip whole databuilder if missing
2215                        Ok(None)
2216                    }
2217                }
2218            },
2219            toml::Value::Table(map) => {
2220                let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2221                for (key, value) in map.iter() {
2222                    if let Some(value) = self.extract_value_metadata(value, context, false, true,  resource_id)? {
2223                        resultmap.insert(key.clone(), value);
2224                    }
2225                }
2226                Ok(Some(resultmap.into()))
2227            },
2228            toml::Value::Array(list) => {
2229                let mut resultlist: Vec<DataValue> = Vec::new();
2230                for value in list.iter() {
2231                    if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2232                        resultlist.push(value);
2233                    }
2234                }
2235                Ok(Some(resultlist.into()))
2236            }
2237            toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2238            toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2239            toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2240            toml::Value::Datetime(_v) => {
2241                todo!("fromxml: Datetime conversion not implemented yet");
2242            }
2243        }
2244    }
2245
2246    /// Select text corresponding to the element/node and document number
2247    fn textselector<'s>(&'s self, node: Node, doc_num: usize, positiontype: PositionType) -> Option<SelectorBuilder<'s>> {
2248        let res_handle = self.resource_handle.expect("resource must be associated");
2249        if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), positiontype)) {
2250            Some(SelectorBuilder::TextSelector(
2251                BuildItem::Handle(res_handle),
2252                offset.clone(),
2253            ))
2254        } else {
2255            None
2256        }
2257    }
2258
2259    /// Select text between this element/node and the next of the same type
2260    fn textselector_for_markers<'b>(
2261        &self,
2262        node: Node,
2263        doc_num: usize,
2264        store: &AnnotationStore,
2265        element_config: &'b XmlElementConfig,
2266    ) -> Option<SelectorBuilder<'b>> {
2267        let resource = store
2268            .resource(
2269                self.resource_handle
2270                    .expect("resource must have been created"),
2271            )
2272            .expect("resource must exist");
2273        let mut end: Option<usize> = None;
2274        if let Some(markers) = self.markers.get(&element_config.hash()) {
2275            let mut grab = false;
2276            for (d_num, n_id) in markers.iter() {
2277                if grab {
2278                    //this marker is the next one, it's begin position is our desired end position
2279                    end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2280                        offset
2281                            .begin
2282                            .try_into()
2283                            .expect("begin cursor must be beginaligned")
2284                    });
2285                    break;
2286                }
2287                if doc_num == *d_num && *n_id == node.id() {
2288                    //current node/marker found, signal grab for the next one
2289                    grab = true;
2290                }
2291            }
2292        };
2293        if end.is_none() {
2294            //no next marker found, find the end
2295            //are we in a restricted scope?
2296            if let Some(scope) = element_config.marker_scope.as_deref() {
2297                if let Some((d_num, n_id)) = self.scopes.get(scope) {
2298                    end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2299                        offset
2300                            .end
2301                            .try_into()
2302                            .expect("end cursor must be beginaligned")
2303                    });
2304                } else {
2305                    eprintln!("WARNING: Undefined scope referenced in marker_scope: {}, no matching text with this `scope_id` in this document! Skipping last marker!", scope);
2306                    return None;
2307                }
2308            } else {
2309                //just use end of document instead
2310                end = Some(resource.textlen());
2311            }
2312        }
2313        if let (Some(offset), Some(end)) = (self.positionmap.get(&(doc_num, node.id(), PositionType::Body)), end) {
2314            Some(SelectorBuilder::TextSelector(
2315                BuildItem::Handle(self.resource_handle.unwrap()),
2316                Offset::simple(
2317                    offset
2318                        .begin
2319                        .try_into()
2320                        .expect("begin cursor must be beginaligned"),
2321                    end,
2322                ),
2323            ))
2324        } else {
2325            None
2326        }
2327    }
2328
2329    fn set_global_context(&mut self) {
2330        self.global_context
2331            .insert("context".into(), upon::Value::Map(self.config.context.iter().map(|(k,v)| (k.clone(), map_value(v))).collect()));
2332        self.global_context
2333            .insert("namespaces".into(), self.config.namespaces.clone().into());
2334        self.global_context
2335            .insert("default_set".into(), self.config.default_set.clone().into());
2336    }
2337
2338    fn render_template<'input, 't>(
2339        &self,
2340        template: &'t str,
2341        node: &Node<'a, 'input>,
2342        begin: Option<usize>,
2343        end: Option<usize>,
2344        resource: Option<&str>,
2345        inputfile: Option<&str>,
2346        doc_num: usize,
2347    ) -> Result<Cow<'t, str>, XmlConversionError> {
2348        if template.chars().any(|c| c == '{') {
2349            //value is a template, templating engine probably needed
2350            let compiled_template = self.template_engine.template(template);
2351            let context = self.context_for_node(&node, begin, end, template, resource, inputfile, doc_num);
2352            let result = compiled_template.render(context).to_string()?;
2353            Ok(Cow::Owned(result))
2354        } else {
2355            //value is a literal: templating engine not needed
2356            Ok(Cow::Borrowed(template))
2357        }
2358    }
2359
2360    fn context_for_node<'input>(
2361        &self,
2362        node: &Node<'a, 'input>,
2363        begin: Option<usize>,
2364        end: Option<usize>,
2365        template: &str,
2366        resource: Option<&str>,
2367        inputfile: Option<&str>,
2368        doc_num: usize,
2369    ) -> upon::Value {
2370        let mut context = self.global_context.clone();
2371        let length = if let (Some(begin), Some(end)) = (begin, end) {
2372            Some(end - begin)
2373        } else {
2374            None
2375        };
2376        context.insert("localname".into(), node.tag_name().name().into());
2377        //name with name prefix (if any)
2378        context.insert("name".into(), self.get_node_name_for_template(node).into());
2379        if let Some(namespace) = node.tag_name().namespace() {
2380            //the full namespace
2381            context.insert("namespace".into(), namespace.into());
2382        }
2383
2384        // Offset in the untangled plain text
2385        if let Some(begin) = begin {
2386            context.insert("begin".into(), upon::Value::Integer(begin as i64));
2387        }
2388        if let Some(end) = end {
2389            context.insert("end".into(), upon::Value::Integer(end as i64));
2390        }
2391        if let Some(length) = length {
2392            context.insert("length".into(), upon::Value::Integer(length as i64));
2393        }
2394        if let Some(resource) = resource {
2395            //the resource ID
2396            context.insert("resource".into(), resource.into());
2397        }
2398        if let Some(inputfile) = inputfile {
2399            //the input file
2400            context.insert("inputfile".into(), inputfile.into());
2401        }
2402        //document number (0-indexed), useful in case multiple input documents are cast to a single output text
2403        context.insert("doc_num".into(), upon::Value::Integer(doc_num as i64));
2404
2405        if let Some(vars) = self.variables.get(template) {
2406            for var in vars {
2407                let mut encodedvar = String::new();
2408                if let Some(value) = self.context_for_var(node, var, &mut encodedvar, false) {
2409                    if self.config.debug() {
2410                        eprintln!(
2411                            "[STAM fromxml]              Set context variable for template '{}' for node '{}': {}={:?}   (encodedvar={})",
2412                            template,
2413                            node.tag_name().name(),
2414                            var,
2415                            value,
2416                            encodedvar
2417                        );
2418                    }
2419                    if value != upon::Value::None {
2420                        context.insert(encodedvar, value);
2421                    }
2422                } else if self.config.debug() {
2423                    eprintln!(
2424                        "[STAM fromxml]              Missed context variable for template '{}' for node '{}': {}",
2425                        template,
2426                        node.tag_name().name(),
2427                        var
2428                    );
2429                }
2430            }
2431        }
2432        upon::Value::Map(context)
2433    }
2434
2435    /// Looks up a variable value (from the DOM XML) to be used in for template context
2436    // returns value and stores full the *encoded* variable name in path (this is safe to pass to template)
2437    // return values are temporarily aggregated in multiple if multiple elements are requested, it will be emptied automatically, the caller owns it but doesn't use it itself.
2438    fn context_for_var<'input>(
2439        &self,
2440        node: &Node<'a, 'input>,
2441        var: &str,
2442        path: &mut String,
2443        mut return_all_matches: bool,
2444    ) -> Option<upon::Value> {
2445
2446        //are we the first call by the caller or are we a recursion?
2447        let first = path.is_empty();
2448
2449        let var = if var.starts_with("?.$$") {
2450            if first {
2451                path.push_str("?.ELEMENTS_");
2452                return_all_matches = true;
2453                if self.config.debug {
2454                    eprintln!("[STAM fromxml]              will return all matches for {}", var);
2455                }
2456            };
2457            &var[4..]
2458        } else if var.starts_with("?.$") {
2459            if first {
2460                path.push_str("?.ELEMENT_");
2461            };
2462            &var[3..]
2463        } else if var.starts_with("$$") {
2464            if first {
2465                path.push_str("ELEMENTS_");
2466                return_all_matches = true;
2467                if self.config.debug {
2468                    eprintln!("[STAM fromxml]              will return all matches for {}", var);
2469                }
2470            };
2471            &var[2..]
2472        } else if var.starts_with("$") {
2473            if first {
2474                path.push_str("ELEMENT_");
2475            };
2476            &var[1..]
2477        } else if var.starts_with("?.@") {
2478            if first {
2479                path.push_str("?.");
2480            };
2481            &var[2..]
2482        } else {
2483            var
2484        };
2485
2486        if !first && !var.is_empty() && !path.ends_with("ELEMENT_") && !path.ends_with("ELEMENTS_"){
2487            path.push_str("_IN_");
2488        }
2489
2490        //get the first component of the variable
2491        let (component, remainder) = var.split_once("/").unwrap_or((var,""));
2492        //eprintln!("DEBUG: component={}, remainder={}, node={}, return_all_matches={}", component, remainder, node.tag_name().name(), return_all_matches);
2493        if component.is_empty() {
2494            if first && !remainder.is_empty() {
2495                //we're asked to start at the root node
2496                let mut n = node.clone();
2497                //find the root node
2498                while let Some(parentnode) = n.parent_element() {
2499                    n = parentnode;
2500                }
2501                //recurse from root node
2502                let (rootcomponent, remainder) = remainder.split_once("/").unwrap_or((remainder,""));
2503                let (prefix, localname)  = if let Some(pos) = rootcomponent.find(":") {
2504                    (Some(&rootcomponent[0..pos]),  &rootcomponent[pos+1..])
2505                } else {
2506                    (None, rootcomponent)
2507                };
2508                //test if root name corresponds with what we expected
2509                if localname != n.tag_name().name() && localname != "*" {
2510                    None
2511                } else {
2512                    if let Some(prefix) = prefix {
2513                        path.push_str(prefix);
2514                        path.push_str("__");
2515                    }
2516                    path.push_str(localname);
2517                    self.context_for_var(&n, remainder, path, return_all_matches)
2518                }
2519            } else {
2520                //an empty component is the stop condition , this function is called recursively, stripping one
2521                //component at a time until nothing is left, we then take the text of that final node:
2522                Some(recursive_text(node).into())
2523            }
2524        } else if component.starts_with("@"){
2525            if let Some(pos) = component.find(":") {
2526                let prefix = &component[1..pos];
2527                if let Some(ns) = self.config.namespaces.get(prefix) {
2528                    let var = &component[pos+1..];
2529                    path.push_str("ATTRIB_");
2530                    path.push_str(prefix);
2531                    path.push_str("__");
2532                    path.push_str(var);
2533                    Some(
2534                        node.attribute((ns.as_str(),var)).into()
2535                    )
2536                } else {
2537                    None
2538                }
2539            } else {
2540                let var = &component[1..];
2541                path.push_str("ATTRIB_");
2542                path.push_str(var);
2543                Some(
2544                    node.attribute(var).into()
2545                )
2546            }
2547        } else if component == ".." {
2548            if let Some(parentnode) = node.parent_element().as_ref() {
2549                //recurse with parent node
2550                path.push_str("PARENT");
2551                self.context_for_var(parentnode, remainder, path, return_all_matches)
2552            } else {
2553                None
2554            }
2555        } else if component == "." {
2556            path.push_str("THIS");
2557            if !remainder.is_empty() {
2558                //a . is meaningless if not the final component
2559                self.context_for_var(node, remainder, path, return_all_matches)
2560            } else {
2561                Some(recursive_text(node).into())
2562            }
2563        } else {
2564            let (prefix, localname)  = if let Some(pos) = component.find(":") {
2565                (Some(&component[0..pos]),  &component[pos+1..])
2566            } else {
2567                (None, component)
2568            };
2569            let localname_with_condition = localname;
2570            let (localname, condition_str, condition) = self.extract_condition(localname_with_condition); //extract X-Path like conditions [@attrib="value"]  (very limited!)
2571            //eprintln!("DEBUG: looking for {} (prefix={:?},localname={}, condition={:?}) in {:?}", localname_with_condition,  prefix, localname, condition, node.tag_name());
2572            let mut multiple_value_buffer: Vec<upon::Value> = Vec::new(); //only used when multiple == true
2573            let mut final_path: String = String::new(); //only used when multiple == true
2574            for child in node.children() {
2575                if child.is_element() {
2576                    let namedata = child.tag_name();
2577                    let mut child_matches = if let Some(namespace) = namedata.namespace() {
2578                        if let Some(foundprefix) = self.prefixes.get(namespace) {
2579                            Some(foundprefix.as_str()) == prefix && localname == namedata.name()
2580                        } else {
2581                            false
2582                        }
2583                    } else {
2584                        namedata.name() == localname
2585                    };
2586                    if child_matches {
2587                        //MAYBE TODO: move to separate funtion
2588                        if let Some((attribname, negate, attribvalue)) = condition {
2589                            //test condition: falsify child_matches
2590                            if let Some(pos) = attribname.find(":") {
2591                                let prefix = &attribname[0..pos];
2592                                if let Some(ns) = self.config.namespaces.get(prefix) {
2593                                    let attribname = &attribname[pos+1..];
2594                                    if let Some(value) = child.attribute((ns.as_str(),attribname)) {
2595                                        if !negate && attribvalue != Some(value) {
2596                                            child_matches = false;
2597                                        } else if negate && attribvalue == Some(value) {
2598                                            child_matches = false;
2599                                        }
2600                                    } else {
2601                                        child_matches = false;
2602                                    }
2603                                } else {
2604                                    child_matches = false;
2605                                }
2606                            } else {
2607                                if let Some(value) = child.attribute(attribname) {
2608                                    if !negate && attribvalue != Some(value) {
2609                                        child_matches = false;
2610                                    } else if negate && attribvalue == Some(value) {
2611                                        child_matches = false;
2612                                    }
2613                                } else {
2614                                    child_matches = false;
2615                                }
2616                            }
2617                        }
2618                        if !child_matches && self.config.debug {
2619                            eprintln!("[STAM fromxml] candidate node does not meet condition: {}", localname_with_condition);
2620                        }
2621                        //end condition test
2622                    }
2623                    if child_matches {
2624                        let prevpathlen = path.len();
2625                        //update path
2626                        if let Some(prefix) = prefix {
2627                            path.push_str(prefix);
2628                            path.push_str("__");
2629                        }
2630                        path.push_str(localname);
2631                        if condition.is_some() {
2632                            //simply encode the condition as a hash (non-decodable but that's okay)
2633                            let mut hasher = DefaultHasher::new();
2634                            condition_str.hash(&mut hasher);
2635                            let h = hasher.finish();
2636                            path.push_str(&format!("_COND{}_", h));
2637                        }
2638                        if let Some(value) = self.context_for_var(&child, remainder, path, return_all_matches) {
2639                            //success
2640                            if return_all_matches {
2641                                if let upon::Value::List(v) = value {
2642                                    multiple_value_buffer.extend(v.into_iter());
2643                                } else {
2644                                    multiple_value_buffer.push(value);
2645                                }
2646                                if final_path.is_empty() {
2647                                    final_path = path.clone();
2648                                }
2649                                //do not return yet, there may be more!
2650                            } else {
2651                                //normal behaviour, get first match
2652                                return Some(value);
2653                            }
2654                        }
2655                        //child didn't match (or we want multiple matches), truncate path again and continue search (a later child may match again!)
2656                        path.truncate(prevpathlen);
2657                    }
2658                }
2659            }
2660            if !multiple_value_buffer.is_empty() {
2661                //we found multiple values, return them
2662                if self.config.debug {
2663                    eprintln!("[STAM fromxml]              returning multiple matches of {} as list", var);
2664                }
2665                //we also return the path of the match
2666                *path = final_path;
2667                Some(multiple_value_buffer.into())
2668            } else {
2669                //no match found for this variable
2670                if self.config.debug {
2671                    eprintln!("[STAM fromxml]              returning with no match found for {} in {}", var, node.tag_name().name());
2672                }
2673                None
2674            }
2675        }
2676    }
2677
2678    fn extract_condition<'b>(&self, localname: &'b str) -> (&'b str, &'b str, Option<(&'b str, bool, Option<&'b str>)>) { //(localname, condition, Option<(attrib, negation, attribvalue)>)
2679        //simple conditional statement
2680        if localname.ends_with("]") {
2681            if let Some(pos) = localname.find("[") {
2682                let condition = &localname[pos+1..localname.len()-1];
2683                let (mut attrib, negation, attribvalue) = if let Some(pos) = condition.find("=") {
2684                     let attrib = condition[0..pos].trim();
2685                     let value = condition[pos+1..].trim();
2686                     let value = &value[1..value.len() - 1]; //strips the literal quotes (") for the value
2687                     if attrib.ends_with('!') {
2688                        //negation (!= operator)
2689                        (attrib[..attrib.len() - 1].trim(), true, Some(value))
2690                     } else {
2691                        (attrib.trim(), false, Some(value))
2692                     }
2693                } else {
2694                    (condition, false, None)
2695                };
2696                if attrib.starts_with('@') {
2697                    //this should actually be mandatory and already checked during template precompilation
2698                    attrib = &attrib[1..];
2699                }
2700                return (&localname[..pos], condition, Some((attrib,  negation,attribvalue )) );
2701            }
2702        }
2703        (localname, "", None)
2704    }
2705
2706
2707    fn get_node_name_for_template<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2708        let extended_name = node.tag_name();
2709        match (extended_name.namespace(), extended_name.name()) {
2710            (Some(namespace), tagname) => {
2711                if let Some(prefix) = self.prefixes.get(namespace) {
2712                    Cow::Owned(format!("{}__{}", prefix, tagname))
2713                } else {
2714                    Cow::Borrowed(tagname)
2715                }
2716            }
2717            (None, tagname) => Cow::Borrowed(tagname),
2718        }
2719    }
2720
2721    fn get_node_name_for_xpath<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2722        let extended_name = node.tag_name();
2723        match (extended_name.namespace(), extended_name.name()) {
2724            (Some(namespace), tagname) => {
2725                if let Some(prefix) = self.prefixes.get(namespace) {
2726                    Cow::Owned(format!("{}:{}", prefix, tagname))
2727                } else {
2728                    Cow::Borrowed(tagname)
2729                }
2730            }
2731            (None, tagname) => Cow::Borrowed(tagname),
2732        }
2733    }
2734
2735
2736    fn precompile(&mut self, template: &'a str) -> Cow<'a,str> {
2737        let mut replacement = String::new();
2738        let mut variables: BTreeSet<&'a str> = BTreeSet::new();
2739        let mut begin = 0;
2740        let mut end = 0;
2741        for i  in 0..template.len() {
2742            let slice = &template[i..];
2743            if slice.starts_with("{{") || slice.starts_with("{%") {
2744                begin = i;
2745            } else if slice.starts_with("}}") || slice.starts_with("%}") {
2746                if end < begin+2 {
2747                    replacement.push_str(&template[end..begin+2]);
2748                }
2749                let inner = &template[begin+2..i]; //the part without the {{  }}
2750                replacement.push_str(&self.precompile_inblock(inner, &mut variables));
2751                end = i;
2752            }
2753        }
2754        if end > 0 {
2755            replacement.push_str(&template[end..]);
2756        }
2757        self.variables.insert(template.into(), variables);
2758        //eprintln!("DEBUG: precompile({}) -> {}", template, replacement);
2759
2760        if !replacement.is_empty() {
2761            Cow::Owned(replacement)
2762        } else {
2763            Cow::Borrowed(template)
2764        }
2765    }
2766
2767    fn precompile_inblock<'s>(&self, s: &'s str, vars: &mut BTreeSet<&'s str>) -> Cow<'s,str> {
2768        let mut quoted = false;
2769        let mut var = false;
2770        let mut begin = 0;
2771        let mut end = 0;
2772        let mut replacement = String::new();
2773        let mut in_condition = false;
2774        for (i,c) in s.char_indices() {
2775            if in_condition && c != ']' {
2776                continue;
2777            }
2778            if c == '"' {
2779                quoted = !quoted;
2780            } else if !quoted {
2781                if !var && (c == '@' || c == '$') {
2782                    //token is an XML variable name, its syntax needs some changes before it can be used in the templating engine
2783                    var = true;
2784                    begin = i;
2785                } else if var && c == '[' {
2786                    in_condition = true;
2787                } else if var && in_condition && c == ']' {
2788                    //end of condition
2789                    in_condition = false;
2790                } else if var && in_condition  {
2791                    //in condition
2792                    continue;
2793                } else if var && (!c.is_alphanumeric() && c != '$' && c != '.' && c != '/' && c != '_' && c != ':' && c != '@') {
2794                    //end of variable (including condition if applicable)
2795                    if end < begin {
2796                        replacement.push_str(&s[end..begin]);
2797                    }
2798                    let varname = &s[begin..i];
2799                    vars.insert(varname);
2800                    let replacement_var = self.precompile_name(varname);
2801                    replacement += &replacement_var;
2802                    end = i;
2803                    var = false;
2804                }
2805            }
2806        }
2807        if end > 0 {
2808            replacement.push_str(&s[end..]);
2809        }
2810        if var {
2811            //don't forget last one
2812            let varname = &s[begin..];
2813            vars.insert(varname);
2814            let replacement_var = self.precompile_name(varname);
2815            replacement += &replacement_var;
2816        }
2817        if !replacement.is_empty() {
2818            //eprintln!("DEBUG: precompile_inblock({}) -> {}", s, replacement);
2819            Cow::Owned(replacement)
2820        } else {
2821            Cow::Borrowed(s)
2822        }
2823    }
2824
2825    /// upon's templating syntax doesn't support some of the characters we use in names, this function substitutes them for more verbose equivalents
2826    fn precompile_name(&self, s: &str) -> String {
2827        let mut replacement = String::new();
2828        let mut begincondition = None;
2829        let mut skip = 0;
2830        for (i,c) in s.char_indices() {
2831            if begincondition.is_some() && c != ']' {
2832                continue;
2833            } else if skip > 0 {
2834                skip -= 1;
2835                continue;
2836            }
2837            if c == '$' {
2838                let slice = &s[i..];
2839                if slice.starts_with("$$..") {
2840                    replacement.push_str("ELEMENTS_PARENT");
2841                    skip = 3;
2842                } else if slice.starts_with("$$.") {
2843                    replacement.push_str("ELEMENTS_THIS");
2844                    skip = 2;
2845                } else if slice.starts_with("$$/") {
2846                    replacement.push_str("ELEMENTS_");
2847                    skip = 2;
2848                } else if slice.starts_with("$$") {
2849                    replacement.push_str("ELEMENTS_");
2850                    skip = 1;
2851                } else if slice.starts_with("$..") {
2852                    replacement.push_str("ELEMENT_PARENT");
2853                    skip = 2;
2854                } else if slice.starts_with("$.") {
2855                    replacement.push_str("ELEMENT_THIS");
2856                    skip = 1;
2857                } else if slice.starts_with("$/") {
2858                    replacement.push_str("ELEMENT_");
2859                    skip = 1;
2860                } else {
2861                    replacement.push_str("ELEMENT_");
2862                }
2863            } else if c == '@' {
2864                replacement.push_str("ATTRIB_");
2865            } else if c == '/' {
2866                replacement.push_str("_IN_");
2867            } else if c == ':' {
2868                replacement.push_str("__");
2869            } else if c == '[' {
2870                begincondition = Some(i+1);
2871            } else if c == ']' {
2872                //conditions are just stored as hashes
2873                if let Some(begin) = begincondition {
2874                    let mut hasher = DefaultHasher::new();
2875                    let _ = &s[begin..i].hash(&mut hasher);
2876                    let h = hasher.finish();
2877                    replacement.push_str(&format!("_COND{}_", h));
2878                }
2879                begincondition = None;
2880            } else {
2881                replacement.push(c);
2882            }
2883        }
2884        //eprintln!("DEBUG: precompile_name({}) -> {}", s, replacement);
2885        replacement
2886    }
2887
2888    fn add_metadata(&self, store: &mut AnnotationStore) -> Result<(), XmlConversionError> {
2889        for metadata in self.config.metadata.iter() {
2890            let mut builder = AnnotationBuilder::new();
2891
2892            let resource_id = if let Some(resource_handle) = self.resource_handle {
2893                store.resource(resource_handle).unwrap().id()
2894            } else {
2895                None
2896            };
2897
2898            let mut context = self.global_context.clone();
2899            if let Some(resource_id) = resource_id {
2900                context.insert("resource".into(), resource_id.into());
2901            }
2902
2903            if let Some(template) = &metadata.id {
2904                let compiled_template = self.template_engine.template(template.as_str());
2905                let id = compiled_template.render(&context).to_string().map_err(|e| 
2906                        XmlConversionError::TemplateError(
2907                            format!(
2908                                "whilst rendering metadata id template '{}'",
2909                                template,
2910                            ),
2911                            Some(e),
2912                        )
2913                    )?;
2914                if !id.is_empty() {
2915                    builder = builder.with_id(id);
2916                }
2917            }
2918
2919            for annotationdata in metadata.annotationdata.iter() {
2920                let mut databuilder = AnnotationDataBuilder::new();
2921                if let Some(template) = &annotationdata.set {
2922                    let compiled_template = self.template_engine.template(template.as_str());
2923                    let dataset = compiled_template.render(&context).to_string().map_err(|e| 
2924                            XmlConversionError::TemplateError(
2925                                format!(
2926                                    "whilst rendering annotationdata/dataset template '{}' for metadata",
2927                                    template,
2928                                ),
2929                                Some(e),
2930                            )
2931                        )?;
2932                    if !dataset.is_empty() {
2933                        databuilder = databuilder.with_dataset(dataset.into())
2934                    }
2935                } else {
2936                    databuilder =
2937                        databuilder.with_dataset(self.config.default_set.as_str().into());
2938                }
2939                if let Some(template) = &annotationdata.key {
2940                    let compiled_template = self.template_engine.template(template.as_str());
2941                    match compiled_template.render(&context).to_string().map_err(|e| 
2942                            XmlConversionError::TemplateError(
2943                                format!(
2944                                    "whilst rendering annotationdata/key template '{}' for metadata",
2945                                    template,
2946                                ),
2947                                Some(e),
2948                            )
2949                        )  {
2950                        Ok(key) if !key.is_empty() =>
2951                            databuilder = databuilder.with_key(key.into()) ,
2952                        Ok(_) if !annotationdata.skip_if_missing => {
2953                            return Err(XmlConversionError::TemplateError(
2954                                format!(
2955                                    "whilst rendering annotationdata/key template '{}' metadata",
2956                                    template,
2957                                ),
2958                                None
2959                            ));
2960                        },
2961                        Err(e) if !annotationdata.skip_if_missing => {
2962                            return Err(e)
2963                        },
2964                        _ => {
2965                            //skip whole databuilder if missing
2966                            continue
2967                        }
2968                    }
2969                }
2970                if let Some(value) = &annotationdata.value {
2971                    match self.extract_value_metadata(value, &upon::Value::Map(context.clone()), annotationdata.allow_empty_value, annotationdata.skip_if_missing,  resource_id.as_deref())? {
2972                        Some(value) => {
2973                            databuilder = databuilder.with_value(value);
2974                        },
2975                        None =>  {
2976                            //skip whole databuilder if missing
2977                            continue
2978                        }
2979                    }
2980                }
2981                builder = builder.with_data_builder(databuilder);
2982            }
2983
2984
2985
2986            // Finish the builder and add the actual annotation to the store, according to its element handling
2987            match metadata.annotation {
2988                XmlAnnotationHandling::TextSelector => {
2989                    // Annotation is on text, translates to TextSelector
2990                    builder = builder.with_target(SelectorBuilder::TextSelector(BuildItem::Handle(self.resource_handle.expect("resource must have handle")), Offset::whole()));
2991                    if self.config.debug {
2992                        eprintln!("[STAM fromxml]   builder AnnotateText: {:?}", builder);
2993                    }
2994                    store.annotate(builder)?;
2995                }
2996                XmlAnnotationHandling::ResourceSelector  | XmlAnnotationHandling::None | XmlAnnotationHandling::Unspecified => {
2997                    // Annotation is metadata (default), translates to ResourceSelector
2998                    builder = builder.with_target(SelectorBuilder::ResourceSelector(
2999                        self.resource_handle.into(),
3000                    ));
3001                    if self.config.debug {
3002                        eprintln!("[STAM fromxml]   builder AnnotateResource: {:?}", builder);
3003                    }
3004                    store.annotate(builder)?;
3005                }
3006                _ => panic!(
3007                    "Invalid annotationhandling for metadata: {:?}",
3008                    metadata.annotation
3009                ),
3010            }
3011        }
3012        Ok(())
3013    }
3014}
3015
3016
3017
3018/// Get recursive text without any elements
3019fn recursive_text(node: &Node) -> String {
3020    let mut s = String::new();
3021    for child in node.children() {
3022        if child.is_text() {
3023            s += child.text().expect("should have text");
3024        } else if child.is_element() {
3025            s += &recursive_text(&child);
3026        }
3027    }
3028    s
3029}
3030
3031// Filters
3032fn filter_capitalize(s: &str) -> String {
3033    let mut out = String::with_capacity(s.len());
3034    for (i, c) in s.chars().enumerate() {
3035        if i == 0 {
3036            out.push_str(&c.to_uppercase().collect::<String>())
3037        } else {
3038            out.push(c);
3039        }
3040    }
3041    out
3042}
3043
3044fn filter_gt(a: &upon::Value, b: &upon::Value) -> bool {
3045    match (a, b) {
3046        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a > *b,
3047        (upon::Value::Float(a), upon::Value::Float(b)) => *a > *b,
3048        (upon::Value::String(a), upon::Value::String(b)) => *a > *b,
3049        _ => false,
3050    }
3051}
3052
3053fn filter_lt(a: &upon::Value, b: &upon::Value) -> bool {
3054    match (a, b) {
3055        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a < *b,
3056        (upon::Value::Float(a), upon::Value::Float(b)) => *a < *b,
3057        (upon::Value::String(a), upon::Value::String(b)) => *a < *b,
3058        _ => false,
3059    }
3060}
3061
3062fn filter_gte(a: &upon::Value, b: &upon::Value) -> bool {
3063    match (a, b) {
3064        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a >= *b,
3065        (upon::Value::Float(a), upon::Value::Float(b)) => *a >= *b,
3066        (upon::Value::String(a), upon::Value::String(b)) => *a >= *b,
3067        _ => false,
3068    }
3069}
3070
3071fn filter_lte(a: &upon::Value, b: &upon::Value) -> bool {
3072    match (a, b) {
3073        (upon::Value::Integer(a), upon::Value::Integer(b)) => *a <= *b,
3074        (upon::Value::Float(a), upon::Value::Float(b)) => *a <= *b,
3075        (upon::Value::String(a), upon::Value::String(b)) => *a <= *b,
3076        _ => false,
3077    }
3078}
3079
3080fn filter_add(a: &upon::Value, b: &upon::Value) -> upon::Value {
3081    match (a, b) {
3082        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a + b),
3083        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a + b),
3084        (upon::Value::String(a), upon::Value::String(b)) => upon::Value::String(a.clone() + b),
3085        _ => upon::Value::None,
3086    }
3087}
3088
3089fn filter_sub(a: &upon::Value, b: &upon::Value) -> upon::Value {
3090    match (a, b) {
3091        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a - b),
3092        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a - b),
3093        _ => upon::Value::None,
3094    }
3095}
3096
3097fn filter_mul(a: &upon::Value, b: &upon::Value) -> upon::Value {
3098    match (a, b) {
3099        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a * b),
3100        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a * b),
3101        _ => upon::Value::None,
3102    }
3103}
3104
3105fn filter_div(a: &upon::Value, b: &upon::Value) -> upon::Value {
3106    match (a, b) {
3107        (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a / b),
3108        (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a / b),
3109        _ => upon::Value::None,
3110    }
3111}
3112
3113
3114/// Map value between toml and upon. This makes a clone.
3115fn map_value(value: &toml::Value) -> upon::Value {
3116    match value {
3117        toml::Value::String(s) => upon::Value::String(s.clone()),
3118        toml::Value::Integer(i) => upon::Value::Integer(*i),
3119        toml::Value::Float(i) => upon::Value::Float(*i),
3120        toml::Value::Boolean(v) => upon::Value::Bool(*v),
3121        toml::Value::Datetime(s) => upon::Value::String(s.to_string()),
3122        toml::Value::Array(v) => upon::Value::List(v.iter().map(|i| map_value(i)).collect()),
3123        toml::Value::Table(v) => upon::Value::Map(v.iter().map(|(k,i)| (k.clone(),map_value(i))).collect()),
3124    }
3125}
3126
3127/// Parse a string that is a result from the template renderer to a DataValue again
3128#[inline]
3129fn string_to_datavalue(value: String, valuetype: Option<&str>) -> Result<DataValue,XmlConversionError> {
3130    match valuetype {
3131        Some("str") | Some("string")  => Ok(DataValue::String(value)),
3132        Some("int") => {
3133            if let Ok(value) = value.parse::<isize>() {
3134                Ok(DataValue::Int(value))
3135            } else {
3136                Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3137            }
3138        },
3139        Some("float") => {
3140            if let Ok(value) = value.parse::<f64>() {
3141                Ok(DataValue::Float(value))
3142            } else {
3143                Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3144            }
3145        },
3146        Some("bool") => match value.as_str() {
3147            "yes" | "true" | "enabled" | "on" | "1" | "active"  => Ok(DataValue::Bool(true)),
3148            _ => Ok(DataValue::Bool(false))
3149        },
3150        Some(x) => {
3151                Err(XmlConversionError::TemplateError(format!("Invalid valuetype: {}", x), None))
3152        }
3153        None => {
3154            //automatically determine type
3155            if let Ok(value) =  value.parse::<isize>() {
3156                Ok(DataValue::Int(value))
3157            } else if let Ok(value) =  value.parse::<f64>() {
3158                Ok(DataValue::Float(value))
3159            } else if value.starts_with("(list) [ ") && value.ends_with(" ]") {
3160                //deserialize lists again
3161                if let Ok(serde_json::Value::Array(values)) = serde_json::from_str(&value[6..]) {
3162                    Ok(DataValue::List(values.into_iter().map(|v| {
3163                        match v {
3164                            serde_json::Value::String(s) => DataValue::String(s),
3165                            serde_json::Value::Number(n) => if let Some(n) = n.as_i64() {
3166                                DataValue::Int(n as isize)
3167                            } else if let Some(n) = n.as_f64() {
3168                                DataValue::Float(n)
3169                            } else {
3170                                unreachable!("number should always be either int or float")
3171                            },
3172                            serde_json::Value::Bool(b) => DataValue::Bool(b),
3173                            _ => DataValue::Null, //nested arrays and maps are NOT supported here!
3174                        }
3175                    }).collect()))
3176                } else {
3177                    Err(XmlConversionError::TemplateError(format!("Unable to deserialize list value: {}", value), None))
3178                }
3179            } else {
3180                Ok(value.into())
3181            }
3182        }
3183    }
3184}
3185
3186fn string_to_templatevalue(value: String) -> upon::Value {
3187    if let Ok(value) =  value.parse::<i64>() {
3188        upon::Value::Integer(value)
3189    } else if let Ok(value) =  value.parse::<f64>() {
3190        upon::Value::Float(value)
3191    } else {
3192        upon::Value::String(value)
3193    }
3194}
3195
3196/// Custom formatter for templating that can also handle lists (the default one in upon can't)
3197/// Lists will be output JSON-style prepended by the marker text "(list) ", this allows deserialisers to turn it into a list again
3198fn value_formatter(f: &mut upon::fmt::Formatter<'_>, value: &upon::Value) -> upon::fmt::Result {
3199    match value {
3200        upon::Value::List(vs) => {
3201            f.write_str("(list) [ ")?;
3202            for (i, v) in vs.iter().enumerate() {
3203                if i > 0 {
3204                    f.write_str(", ")?;
3205                }
3206                if let upon::Value::String(s) = v {
3207                    write!(f, "\"{}\"", s.replace("\"","\\\"").replace("\n"," ").split_whitespace().collect::<Vec<_>>().join(" "))?;
3208                } else {
3209                    upon::fmt::default(f, v)?;
3210                    f.write_char('"')?;
3211                }
3212            }
3213            f.write_str(" ]")?;
3214        }
3215        v => upon::fmt::default(f, v)?, // fallback to default formatter
3216    };
3217    Ok(())
3218}
3219
3220#[derive(Clone,Debug,Deserialize)]
3221struct ExternalFilter {
3222    /// The name of the filter
3223    name: String,
3224
3225    /// The command to run.
3226    command: String,
3227
3228    /// The arguments to pass to the command, you can use "{{ value }}" or `$value` to represent the input value if needed. It will also be passed to stdin. No escaping needed, it is not mediated by a shell.
3229    args: Vec<String>
3230}
3231
3232impl ExternalFilter {
3233    //TODO: panic may be too strict in here:
3234    fn run(&self, input_value: &upon::Value) -> upon::Value {
3235        let process = Command::new(self.command.as_str()).args(
3236            //args are passed directly, not mediated via shell, so no escaping necessary
3237            self.args.iter().map(|x| if x == "{{value}}" || x == "{{ value }}" || x == "$value" {
3238                match input_value {
3239                    upon::Value::String(s) => s.clone(),
3240                    upon::Value::Integer(d) => format!("{}",d),
3241                    upon::Value::Float(d) => format!("{}",d),
3242                    upon::Value::Bool(d) => format!("{}",d),
3243                    upon::Value::None => String::new(),
3244                    _ => panic!("Lists and maps are not supported to be passed as parameter to  external filters yet!"), 
3245                }
3246            } else {
3247                x.clone() //too much cloning, but Cow didn't work here because it is coerced into OsStr later
3248            })
3249        ).stdin(Stdio::piped()).stdout(Stdio::piped()).spawn();
3250
3251
3252        if let Ok(mut process) = process {
3253            {
3254                let mut outstdin = process.stdin.take().expect("unable to open stdin for external filter");
3255                let mut writer = BufWriter::new(&mut outstdin);
3256                match input_value {
3257                    upon::Value::String(s) => writer.write(s.as_bytes()),
3258                    upon::Value::Integer(d) => writer.write(format!("{}",d).as_bytes()),
3259                    upon::Value::Float(d) => writer.write(format!("{}",d).as_bytes()),
3260                    upon::Value::Bool(d) => writer.write(format!("{}",d).as_bytes()),
3261                    upon::Value::None => writer.write(&[]),
3262                    _ => panic!("Lists and maps are not supported to be passed as input to external filters yet!"),
3263                }.expect("Writing to stdin for external filter failed!");
3264                //block ensures writer and outputsdin are dropped prior to waiting for output
3265            }
3266            let output = process.wait_with_output().expect("External filter wasn't running");
3267            if !output.status.success() {
3268                panic!("External filter {} failed ({:?})", self.name, output.status.code());
3269            }
3270            if let Ok(s) = String::from_utf8(output.stdout) {
3271                return string_to_templatevalue(s);
3272            } else {
3273                panic!("External filter {} produced invalid UTF-8!", self.name);
3274            }
3275        }
3276        panic!("External filter {} failed!", self.name);
3277    }
3278}
3279
3280#[cfg(test)]
3281mod tests {
3282    use super::*;
3283    //use crate::info::info;
3284
3285    const XMLSMALLEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3286<head><title>test</title></head><body><h1>TEST</h1><p xml:id="p1" n="001">This  is a <em xml:id="emphasis" style="color:green">test</em>.</p></body></html>"#;
3287
3288    const XMLEXAMPLE: &'static str = r#"<!DOCTYPE entities[<!ENTITY nbsp "&#xA0;">]>
3289<html xmlns="http://www.w3.org/1999/xhtml" xmlns:my="http://example.com">
3290<head>
3291    <title>Test</title>
3292    <meta name="author" content="proycon" />
3293</head>
3294<body>
3295    <h1>Header</h1>
3296
3297    <p xml:id="par1">
3298        <span xml:id="sen1">This is a sentence.</span>
3299        <span xml:id="sen2">This is the second&nbsp;sentence.</span>
3300    </p>
3301    <p xml:id="par2">
3302        <strong>This</strong> is    the <em>second</em> paragraph.
3303            It has a <strong>bold</strong> word and one in <em>italics</em>.<br/>
3304        Let's highlight stress in the following word: <span my:stress="secondary">re</span>pu<span my:stress="primary">ta</span>tion.
3305    </p>
3306    <p xml:space="preserve"><![CDATA[This    third
3307paragraph consists
3308of CDATA and is configured to preserve whitespace, and weird &entities; ]]></p>
3309
3310    <h2>Subsection</h2>
3311
3312    <p>
3313    Have some fruits:<br/>
3314    <ul xml:id="list1" class="fruits">
3315        <li xml:id="fruit1">apple</li>
3316        <li xml:id="fruit2">banana</li>
3317        <li xml:id="fruit3">melon</li>
3318    </ul>
3319    </p>
3320
3321    Some lingering text outside of any confines...
3322</body>
3323</html>"#;
3324
3325    const XMLEXAMPLE_TEXTOUTPUT: &'static str = "Header\n\nThis is a sentence. This is the second sentence.\n\nThis is the second paragraph. It has a bold word and one in italics.\nLet's highlight stress in the following word: reputation.\n\nThis    third\nparagraph consists\nof CDATA and is configured to preserve whitespace, and weird &entities; \nSubsection\n\nHave some fruits:\n* apple\n* banana\n* melon\n\nSome lingering text outside of any confines...";
3326
3327    //fake example (not real HTML, testing TEI-like space attribute with complex template)
3328    const XMLTEISPACE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3329<body><space dim="vertical" unit="lines" quantity="3" /></body></html>"#;
3330
3331    const CONF: &'static str = r#"#default whitespace handling (Collapse or Preserve)
3332whitespace = "Collapse"
3333default_set = "urn:stam-fromhtml"
3334
3335[namespaces]
3336#this defines the namespace prefixes you can use in this configuration
3337xml = "http://www.w3.org/XML/1998/namespace"
3338html = "http://www.w3.org/1999/xhtml"
3339xsd =  "http://www.w3.org/2001/XMLSchema"
3340xlink = "http://www.w3.org/1999/xlink"
3341
3342# elements and attributes are matched in reverse-order, so put more generic statements before more specific ones
3343
3344#Define some base elements that we reuse later for actual elements (prevents unnecessary repetition)
3345[baseelements.common]
3346id = "{% if ?.@xml:id %}{{ @xml:id }}{% endif %}"
3347
3348    [[baseelements.common.annotationdata]]
3349    key = "type"
3350    value = "{{ localname }}"
3351
3352    [[baseelements.common.annotationdata]]
3353    key = "lang"
3354    value = "{{ @xml:lang }}"
3355    skip_if_missing = true
3356
3357    [[baseelements.common.annotationdata]]
3358    key = "n"
3359    value = "{{ @n }}"
3360    skip_if_missing = true
3361    valuetype = "int"
3362
3363    [[baseelements.common.annotationdata]]
3364    key = "nstring"
3365    value = "{{ @n }}"
3366    skip_if_missing = true
3367    valuetype = "string"
3368
3369    [[baseelements.common.annotationdata]]
3370    key = "style"
3371    value = "{{ @style }}"
3372    skip_if_missing = true
3373
3374    [[baseelements.common.annotationdata]]
3375    key = "class"
3376    value = "{{ @class }}"
3377    skip_if_missing = true
3378
3379    [[baseelements.common.annotationdata]]
3380    key = "src"
3381    value = "{{ @src }}"
3382    skip_if_missing = true
3383
3384[baseelements.text]
3385text = true
3386
3387
3388[[elements]]
3389base = [ "text", "common" ]
3390path = "*"
3391text = true
3392annotation = "TextSelector"
3393
3394# Pass through the following elements without mapping to text
3395[[elements]]
3396base = [ "common" ]
3397path = "//html:head"
3398
3399[[elements]]
3400base = [ "common" ]
3401path = "//html:head//*"
3402
3403# Map metadata like <meta name="key" content="value"> to annotations with key->value data selecting the resource (ResourceSelector)
3404[[elements]]
3405base = [ "common" ]
3406path = "//html:head//html:meta"
3407
3408[[elements.annotationdata]]
3409key = "{% if ?.@name %}{{ name }}{% endif %}"
3410value = "{% if ?.@content %}{{ @content }}{% endif %}"
3411skip_if_missing = true
3412
3413# By default, ignore any tags in the head (unless they're mentioned specifically later in the config)
3414[[elements]]
3415path = "//html:head/html:title"
3416annotation = "ResourceSelector"
3417
3418[[elements.annotationdata]]
3419key = "title"
3420value = "{{ $. | trim }}"
3421
3422
3423# Determine how various structural elements are converted to text
3424
3425[[elements]]
3426base = [ "common" ]
3427path = "//html:br"
3428textsuffix = "\n"
3429
3430[[elements]]
3431base = [ "common", "text" ]
3432path = "//html:p"
3433textprefix = "\n"
3434textsuffix = "\n"
3435annotation = "TextSelector"
3436
3437# Let's do headers and bulleted lists like markdown
3438[[elements]]
3439base = [ "common", "text" ]
3440path = "//html:h1"
3441textsuffix = "\n"
3442annotation = "TextSelector"
3443id = "h1"
3444
3445[[elements]]
3446base = [ "common", "text" ]
3447path = "//html:body//html:h2"
3448textsuffix = "\n"
3449annotation = "TextSelector"
3450id = "h2"
3451
3452#Generic, will be overriden by more specific one
3453[[elements]]
3454base = [ "common", "text" ]
3455path = "//html:li"
3456textprefix = "- "
3457textsuffix = "\n"
3458
3459[[elements]]
3460base = [ "common", "text" ]
3461path = """//html:body"""
3462annotation = "TextSelector"
3463id = "body"
3464
3465    [[elements.annotationdata]]
3466    key = "title_from_parent"
3467    value = "{{ $../html:head/html:title }}"
3468    skip_if_missing = true
3469
3470    [[elements.annotationdata]]
3471    key = "title_from_root"
3472    value = "{{ $/html:html/html:head/html:title }}"
3473    skip_if_missing = true
3474
3475    [[elements.annotationdata]]
3476    key = "firstfruit"
3477    value = """{{ $./html:p/html:ul/html:li }}"""
3478    skip_if_missing = true
3479
3480    [[elements.annotationdata]]
3481    key = "fruits"
3482    value = """{{ $$./html:p/html:ul/html:li }}"""
3483    skip_if_missing = true
3484
3485    [[elements.annotationdata]]
3486    key = "multifruits"
3487    value = """{{ $$./html:p/html:ul/html:li }}"""
3488    skip_if_missing = true
3489    multiple = true
3490
3491#More specific one takes precendence over the above generic one
3492[[elements]]
3493base = [ "common", "text" ]
3494path = """//html:ul[@class="fruits"]/html:li"""
3495textprefix = "* "
3496textsuffix = "\n"
3497
3498#Not real HTML, test-case modelled after TEI space
3499[[elements]]
3500base = [ "common" ]
3501path = """//html:space[@dim="vertical" and @unit="lines"]"""
3502text = true
3503textsuffix = """\n{% for x in @quantity | int | as_range %}\n{% endfor %}"""
3504
3505
3506[[elements]]
3507base = [ "common", "text" ]
3508path = "//html:example"
3509annotation = "TextSelector"
3510
3511[[elements.annotationdata]]
3512key = "requiredattrib"
3513value = "{{ @requiredattrib }}"
3514
3515[[elements.annotationdata]]
3516key = "optattrib"
3517value = "{{ ?.@optattrib }}"
3518
3519[[elements]]
3520base = [ "common","text" ]
3521path = "//html:marquee"
3522annotation = "TextSelector"
3523
3524#map value, some bogus data to test parsing
3525[[elements.annotationdata]]
3526key = "map"
3527
3528[elements.annotationdata.value]
3529text = "{{ $. }}"
3530number = 42
3531bogus = true
3532
3533[[metadata]]
3534id = "metadata"
3535
3536[[metadata.annotationdata]]
3537key = "author"
3538value = "proycon"
3539"#;
3540
3541    const XMLREQATTRIBEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3542<body><example xml:id="ann1" requiredattrib="blah">test</example></body></html>"#;
3543
3544    const XMLREQATTRIBEXAMPLE2: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3545<body><example xml:id="ann1">test</example></body></html>"#;
3546
3547    const XMLREQATTRIBEXAMPLE3: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3548<body><example xml:id="ann1" requiredattrib="blah" optattrib="blah">test</example></body></html>"#;
3549
3550    const XMLMAPEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3551<body><marquee xml:id="ann1">test</marquee></body></html>"#;
3552
3553    #[test]
3554    fn test_precompile_template_nochange() -> Result<(), String> {
3555        let config = XmlConversionConfig::new();
3556        let mut conv = XmlToStamConverter::new(&config);
3557        let template_in = "{{ foo }}";
3558        let template_out = conv.precompile(template_in);
3559        assert_eq!( template_out, template_in);
3560        //foo is not a special variable
3561        assert!(!conv.variables.get(template_in).as_ref().unwrap().contains("foo"));
3562        Ok(())
3563    }
3564
3565    #[test]
3566    fn test_precompile_template_attrib() -> Result<(), String> {
3567        let config = XmlConversionConfig::new();
3568        let mut conv = XmlToStamConverter::new(&config);
3569        let template_in = "{{ @foo }}";
3570        let template_out = conv.precompile(template_in);
3571        assert_eq!(template_out, "{{ ATTRIB_foo }}");
3572        //foo is an attribute so is returned
3573        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3574        Ok(())
3575    }
3576
3577    #[test]
3578    fn test_precompile_template_attrib_ns() -> Result<(), String> {
3579        let config = XmlConversionConfig::new();
3580        let mut conv = XmlToStamConverter::new(&config);
3581        let template_in = "{{ @bar:foo }}";
3582        let template_out = conv.precompile(template_in);
3583        assert_eq!(template_out, "{{ ATTRIB_bar__foo }}");
3584        //foo is an attribute so is returned
3585        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@bar:foo"));
3586        Ok(())
3587    }
3588
3589    #[test]
3590    fn test_precompile_template_element() -> Result<(), String> {
3591        let config = XmlConversionConfig::new();
3592        let mut conv = XmlToStamConverter::new(&config);
3593        let template_in = "{{ $foo }}";
3594        let template_out = conv.precompile(template_in);
3595        assert_eq!(template_out, "{{ ELEMENT_foo }}");
3596        //foo is an element so is returned
3597        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$foo"));
3598        Ok(())
3599    }
3600
3601    #[test]
3602    fn test_precompile_template_element_ns() -> Result<(), String> {
3603        let config = XmlConversionConfig::new();
3604        let mut conv = XmlToStamConverter::new(&config);
3605        let template_in = "{{ $bar:foo }}";
3606        let template_out = conv.precompile(template_in);
3607        assert_eq!(template_out, "{{ ELEMENT_bar__foo }}");
3608        //foo is an element so is returned
3609        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$bar:foo"));
3610        Ok(())
3611    }
3612
3613    #[test]
3614    fn test_precompile_template_this_text() -> Result<(), String> {
3615        let config = XmlConversionConfig::new();
3616        let mut conv = XmlToStamConverter::new(&config);
3617        let template_in = "{{ $. }}";
3618        let template_out = conv.precompile(template_in);
3619        assert_eq!(template_out, "{{ ELEMENT_THIS }}");
3620        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$."));
3621        Ok(())
3622    }
3623
3624    #[test]
3625    fn test_precompile_template_parent_text() -> Result<(), String> {
3626        let config = XmlConversionConfig::new();
3627        let mut conv = XmlToStamConverter::new(&config);
3628        let template_in = "{{ $.. }}";
3629        let template_out = conv.precompile(template_in);
3630        assert_eq!(template_out, "{{ ELEMENT_PARENT }}");
3631        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$.."));
3632        Ok(())
3633    }
3634
3635    #[test]
3636    fn test_precompile_template_elements() -> Result<(), String> {
3637        let config = XmlConversionConfig::new();
3638        let mut conv = XmlToStamConverter::new(&config);
3639        let template_in = "{{ $$foo }}";
3640        let template_out = conv.precompile(template_in);
3641        assert_eq!(template_out, "{{ ELEMENTS_foo }}");
3642        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$foo"));
3643        Ok(())
3644    }
3645
3646    #[test]
3647    fn test_precompile_template_elements_ns() -> Result<(), String> {
3648        let config = XmlConversionConfig::new();
3649        let mut conv = XmlToStamConverter::new(&config);
3650        let template_in = "{{ $$bar:foo }}";
3651        let template_out = conv.precompile(template_in);
3652        assert_eq!(template_out, "{{ ELEMENTS_bar__foo }}");
3653        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$bar:foo"));
3654        Ok(())
3655    }
3656
3657
3658    #[test]
3659    fn test_precompile_template_attrib2() -> Result<(), String> {
3660        let config = XmlConversionConfig::new();
3661        let mut conv = XmlToStamConverter::new(&config);
3662        let template_in = "{% for x in @foo %}";
3663        let template_out = conv.precompile(template_in);
3664        assert_eq!(template_out, "{% for x in ATTRIB_foo %}");
3665        //foo is an attribute so is returned
3666        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3667        Ok(())
3668    }
3669
3670    #[test]
3671    fn test_precompile_template_attrib3() -> Result<(), String> {
3672        let config = XmlConversionConfig::new();
3673        let mut conv = XmlToStamConverter::new(&config);
3674        let template_in = "{{ ?.@foo }}";
3675        let template_out = conv.precompile(template_in);
3676        assert_eq!(template_out, "{{ ?.ATTRIB_foo }}");
3677        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3678        Ok(())
3679    }
3680
3681    #[test]
3682    fn test_precompile_template_path() -> Result<(), String> {
3683        let config = XmlConversionConfig::new();
3684        let mut conv = XmlToStamConverter::new(&config);
3685        let template_in = "{{ $x/y/z/@a }}";
3686        let template_out = conv.precompile(template_in);
3687        assert_eq!(template_out, "{{ ELEMENT_x_IN_y_IN_z_IN_ATTRIB_a }}");
3688        assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$x/y/z/@a"));
3689        Ok(())
3690    }
3691
3692    #[test]
3693    fn test_loadconfig() -> Result<(), String> {
3694        let config = XmlConversionConfig::from_toml_str(CONF)?;
3695        let mut conv = XmlToStamConverter::new(&config);
3696        conv.compile().map_err(|e| format!("{}",e))?;
3697        assert_eq!(conv.config.namespaces.len(),4 , "number of namespaces");
3698        assert_eq!(conv.config.elements.len(), 15, "number of elements");
3699        assert_eq!(conv.config.baseelements.len(), 2, "number of baseelements");
3700        assert_eq!(conv.config.elements.get(0).unwrap().annotationdata.len(), 7,"number of annotationdata under first element");
3701        assert_eq!(conv.config.baseelements.get("common").unwrap().annotationdata.len(), 7,"number of annotationdata under baseelement common");
3702        Ok(())
3703    }
3704
3705    #[test]
3706    fn test_small() -> Result<(), String> {
3707        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3708        let mut store = stam::AnnotationStore::new(stam::Config::new());
3709        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3710        let res = store.resource("test").expect("resource must have been created at this point");
3711        assert_eq!(res.text(), "TEST\n\nThis is a test.\n", "resource text");
3712        assert_eq!(store.annotations_len(), 7, "number of annotations");
3713        let annotation = store.annotation("emphasis").expect("annotation must have been created at this point");
3714        assert_eq!(annotation.text_simple(), Some("test"));
3715        //eprintln!("DEBUG: {:?}",annotation.data().collect::<Vec<_>>());
3716        let key = store.key("urn:stam-fromhtml", "style").expect("key must exist");
3717        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("color:green"));
3718        let key = store.key("urn:stam-fromhtml", "title").expect("key must exist");
3719        let annotation = res.annotations_as_metadata().filter_key(&key).next().expect("annotation");
3720        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("test"));
3721        let bodyannotation = store.annotation("body").expect("body annotation not found");
3722        let title1 = store.key("urn:stam-fromhtml", "title_from_parent").expect("key must exist");
3723        let title2 = store.key("urn:stam-fromhtml", "title_from_root").expect("key must exist");
3724        assert_eq!(bodyannotation.data().filter_key(&title1).value_as_str(), Some("test"));
3725        assert_eq!(bodyannotation.data().filter_key(&title2).value_as_str(), Some("test"));
3726        Ok(())
3727    }
3728
3729    #[test]
3730    fn test_full() -> Result<(), String> {
3731        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3732        let mut store = stam::AnnotationStore::new(stam::Config::new());
3733        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3734        let res = store.resource("test").expect("resource must have been created at this point");
3735        assert_eq!(res.text(), XMLEXAMPLE_TEXTOUTPUT, "resource text");
3736        Ok(())
3737    }
3738
3739    #[test]
3740    fn test_firstfruit() -> Result<(), String> {
3741        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3742        let mut store = stam::AnnotationStore::new(stam::Config::new());
3743        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3744        let bodyannotation = store.annotation("body").expect("body annotation not found");
3745        let fruit = store.key("urn:stam-fromhtml", "firstfruit").expect("key must exist");
3746        assert_eq!(bodyannotation.data().filter_key(&fruit).value_as_str(), Some("apple") );
3747        Ok(())
3748    }
3749
3750    #[test]
3751    fn test_fruits() -> Result<(), String> {
3752        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3753        let mut store = stam::AnnotationStore::new(stam::Config::new());
3754        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3755        let bodyannotation = store.annotation("body").expect("body annotation not found");
3756        let fruits = store.key("urn:stam-fromhtml", "fruits").expect("key must exist");
3757        assert_eq!(bodyannotation.data().filter_key(&fruits).value(), Some(&DataValue::List(vec!("apple".into(),"banana".into(),"melon".into()) )));
3758        Ok(())
3759    }
3760
3761    #[test]
3762    fn test_multifruits() -> Result<(), String> {
3763        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3764        let mut store = stam::AnnotationStore::new(stam::Config::new());
3765        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3766        let bodyannotation = store.annotation("body").expect("body annotation not found");
3767        let fruits = store.key("urn:stam-fromhtml", "multifruits").expect("key must exist");
3768        let results: Vec<_> = bodyannotation.data().filter_key(&fruits).collect();
3769        assert_eq!(results.len(), 3);
3770        assert_eq!(results.get(0).unwrap().value(),&DataValue::String("apple".to_string()) );
3771        assert_eq!(results.get(1).unwrap().value(),&DataValue::String("banana".to_string()) );
3772        assert_eq!(results.get(2).unwrap().value(),&DataValue::String("melon".to_string()) );
3773        Ok(())
3774    }
3775
3776    #[test]
3777    fn test_teispace() -> Result<(), String> {
3778        let config = XmlConversionConfig::from_toml_str(CONF)?;
3779        let mut store = stam::AnnotationStore::new(stam::Config::new());
3780        from_xml_in_memory("test", XMLTEISPACE, &config, &mut store)?;
3781        let res = store.resource("test").expect("resource must have been created at this point");
3782        assert_eq!(res.text(), "\n\n\n\n", "resource text");
3783        Ok(())
3784    }
3785
3786
3787    #[test]
3788    fn test_reqattrib() -> Result<(), String> {
3789        let config = XmlConversionConfig::from_toml_str(CONF)?;
3790        let mut store = stam::AnnotationStore::new(stam::Config::new());
3791        from_xml_in_memory("test", XMLREQATTRIBEXAMPLE, &config, &mut store)?;
3792        let res = store.resource("test").expect("resource must have been created at this point");
3793        assert_eq!(res.text(), "test", "resource text");
3794        let key = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3795        let annotation = store.annotation("ann1").expect("annotation");
3796        assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("blah"));
3797        assert!(store.key("urn:stam-fromhtml", "optattrib").is_none(), "optional attrib is unused");
3798        Ok(())
3799    }
3800
3801    #[test]
3802    fn test_reqattrib2() -> Result<(), String> {
3803        let mut config = XmlConversionConfig::from_toml_str(CONF)?;
3804        config = config.with_debug(true);
3805        let mut store = stam::AnnotationStore::new(stam::Config::new());
3806        assert!(from_xml_in_memory("test", XMLREQATTRIBEXAMPLE2, &config, &mut store).is_err(), "checking if error is returned");
3807        Ok(())
3808    }
3809
3810    #[test]
3811    fn test_reqattrib3() -> Result<(), String> {
3812        let config = XmlConversionConfig::from_toml_str(CONF)?;
3813        let mut store = stam::AnnotationStore::new(stam::Config::new());
3814        from_xml_in_memory("test", XMLREQATTRIBEXAMPLE3, &config, &mut store)?;
3815        let res = store.resource("test").expect("resource must have been created at this point");
3816        assert_eq!(res.text(), "test", "resource text");
3817        let reqkey = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3818        let optkey = store.key("urn:stam-fromhtml", "optattrib").expect("key optattrib must exist");
3819        let annotation = store.annotation("ann1").expect("annotation");
3820        assert_eq!(annotation.data().filter_key(&reqkey).value_as_str(), Some("blah"));
3821        assert_eq!(annotation.data().filter_key(&optkey).value_as_str(), Some("blah"));
3822        Ok(())
3823    }
3824
3825    #[test]
3826    fn test_map() -> Result<(), String> {
3827        let config = XmlConversionConfig::from_toml_str(CONF)?;
3828        let mut store = stam::AnnotationStore::new(stam::Config::new());
3829        from_xml_in_memory("test", XMLMAPEXAMPLE, &config, &mut store)?;
3830        let res = store.resource("test").expect("resource must have been created at this point");
3831        assert_eq!(res.text(), "test", "resource text");
3832        let key = store.key("urn:stam-fromhtml", "map").expect("key must exist");
3833        let annotation = store.annotation("ann1").expect("annotation");
3834        let data = annotation.data().filter_key(&key).value().expect("data must exist");
3835        if let DataValue::Map(data) = data {
3836            assert_eq!(data.get("text"), Some(&DataValue::String("test".into())));
3837            assert_eq!(data.get("number"), Some(&DataValue::Int(42)));
3838            assert_eq!(data.get("bogus"), Some(&DataValue::Bool(true)));
3839            assert_eq!(data.len(), 3);
3840        } else {
3841            assert!(false, "Data is supposed to be a map");
3842        }
3843        Ok(())
3844    }
3845
3846    #[test]
3847    fn test_metadata() -> Result<(), String> {
3848        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3849        let mut store = stam::AnnotationStore::new(stam::Config::new());
3850        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3851        let annotation = store.annotation("metadata").expect("annotation");
3852        let key = store.key("urn:stam-fromhtml", "author").expect("key must exist");
3853        let data = annotation.data().filter_key(&key).value().expect("data must exist");
3854        assert_eq!(data, &DataValue::String("proycon".into()));
3855        Ok(())
3856    }
3857
3858    #[test]
3859    fn test_datavalue_int() -> Result<(), String> {
3860        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3861        let mut store = stam::AnnotationStore::new(stam::Config::new());
3862        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3863        let annotation = store.annotation("p1").expect("annotation not found");
3864        let key = store.key("urn:stam-fromhtml", "n").expect("key must exist");
3865        assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::Int(1)));
3866        Ok(())
3867    }
3868
3869    #[test]
3870    fn test_datavalue_string() -> Result<(), String> {
3871        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3872        let mut store = stam::AnnotationStore::new(stam::Config::new());
3873        from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3874        let annotation = store.annotation("p1").expect("annotation not found");
3875        let key = store.key("urn:stam-fromhtml", "nstring").expect("key must exist");
3876        assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::String("001".to_string())));
3877        Ok(())
3878    }
3879
3880    #[test]
3881    fn test_doubleslash_selector_root() -> Result<(), String> {
3882        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3883        let mut store = stam::AnnotationStore::new(stam::Config::new());
3884        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3885        assert!( store.annotation("h1").is_some());
3886        Ok(())
3887    }
3888
3889    #[test]
3890    fn test_doubleslash_selector_infix_none() -> Result<(), String> {
3891        let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3892        let mut store = stam::AnnotationStore::new(stam::Config::new());
3893        from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3894        assert!( store.annotation("h2").is_some());
3895        Ok(())
3896    }
3897
3898
3899}