1use std::borrow::Cow;
2use std::collections::{BTreeMap, HashMap, BTreeSet};
3use std::fmt::Display;
4use std::fs::read_to_string;
5use std::path::Path;
6use std::hash::{Hash,DefaultHasher,Hasher};
7use std::process::{Command, Stdio};
8use std::io::{ BufWriter, Write};
9
10use roxmltree::{Document, Node, NodeId, ParsingOptions};
11use serde::Deserialize;
12use stam::*;
13use toml;
14use upon::Engine;
15use std::fmt::Write as FmtWrite;
16use serde_json;
17
18const NS_XML: &str = "http://www.w3.org/XML/1998/namespace";
19const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
20
21
22fn default_set() -> String {
23 "urn:stam-fromxml".into()
24}
25
26#[derive(Deserialize)]
27pub struct XmlConversionConfig {
29 #[serde(default)]
30 elements: Vec<XmlElementConfig>,
32
33 #[serde(default)]
34 baseelements: HashMap<String, XmlElementConfig>,
36
37 #[serde(default)]
38 namespaces: HashMap<String, String>,
40
41 #[serde(default = "XmlWhitespaceHandling::collapse")]
42 whitespace: XmlWhitespaceHandling,
44
45 #[serde(default)]
46 context: HashMap<String, toml::Value>,
48
49 #[serde(default)]
50 metadata: Vec<MetadataConfig>,
52
53 #[serde(default)]
54 inject_dtd: Option<String>,
56
57 #[serde(default = "default_set")]
58 default_set: String,
59
60 #[serde(default)]
61 id_prefix: Option<String>,
63
64 #[serde(default)]
65 id_strip_suffix: Vec<String>,
67
68 #[serde(default)]
69 provenance: bool,
71
72 #[serde(default)]
73 external_filters: Vec<ExternalFilter>,
74
75 #[serde(skip_deserializing)]
76 debug: bool,
77
78}
79
80impl XmlConversionConfig {
81 pub fn new() -> Self {
82 Self {
83 elements: Vec::new(),
84 baseelements: HashMap::new(),
85 namespaces: HashMap::new(),
86 context: HashMap::new(),
87 metadata: Vec::new(),
88 whitespace: XmlWhitespaceHandling::Collapse,
89 default_set: default_set(),
90 inject_dtd: None,
91 id_prefix: None,
92 id_strip_suffix: Vec::new(),
93 provenance: false,
94 external_filters: Vec::new(),
95 debug: false,
96 }
97 }
98
99 pub fn resolve_baseelements(&mut self) -> Result<(), XmlConversionError> {
100 let mut replace: Vec<(usize, XmlElementConfig)> = Vec::new();
101 for (i, element) in self.elements.iter().enumerate() {
102 let mut newelement = None;
103 for basename in element.base.iter().rev() {
104 if let Some(baseelement) = self.baseelements.get(basename) {
105 if newelement.is_none() {
106 newelement = Some(element.clone());
107 }
108 newelement
109 .as_mut()
110 .map(|newelement| newelement.update(baseelement));
111 } else {
112 return Err(XmlConversionError::ConfigError(format!(
113 "No such base element: {}",
114 basename
115 )));
116 }
117 }
118 if let Some(newelement) = newelement {
119 replace.push((i, newelement));
120 }
121 }
122 for (i, element) in replace {
123 self.elements[i] = element;
124 }
125 Ok(())
126 }
127
128 pub fn from_toml_str(tomlstr: &str) -> Result<Self, String> {
130 let mut config: Self = toml::from_str(tomlstr).map_err(|e| format!("{}", e))?;
131 config.resolve_baseelements().map_err(|e| format!("{}", e))?;
132 Ok(config)
133 }
134
135 pub fn with_debug(mut self, value: bool) -> Self {
136 self.debug = value;
137 self
138 }
139
140 pub fn with_provenance(mut self, value: bool) -> Self {
142 self.provenance = value;
143 self
144 }
145
146 pub fn with_prefix(mut self, prefix: impl Into<String>, namespace: impl Into<String>) -> Self {
148 self.namespaces.insert(prefix.into(), namespace.into());
149 self
150 }
151
152 pub fn with_id_prefix(mut self, prefix: impl Into<String>) -> Self {
154 self.id_prefix = Some(prefix.into());
155 self
156 }
157
158 pub fn with_id_strip_suffix(mut self, suffix: impl Into<String>) -> Self {
160 self.id_strip_suffix.push(suffix.into());
161 self
162 }
163
164 pub fn with_inject_dtd(mut self, dtd: impl Into<String>) -> Self {
166 self.inject_dtd = Some(dtd.into());
167 self
168 }
169
170 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
172 self.whitespace = handling;
173 self
174 }
175
176 pub fn with_element<F>(mut self, expression: &str, setup: F) -> Self
178 where
179 F: Fn(XmlElementConfig) -> XmlElementConfig,
180 {
181 let expression = XPathExpression::new(expression);
182 let element = setup(XmlElementConfig::new(expression));
183 if self.debug {
184 eprintln!("[STAM fromxml] registered {:?}", element);
185 }
186 self.elements.push(element);
187 self
188 }
189
190 fn element_config(&self, node: Node, path: &NodePath) -> Option<&XmlElementConfig> {
192 for elementconfig in self.elements.iter().rev() {
193 if elementconfig.path.test(path, node, self) {
194 return Some(elementconfig);
195 }
196 }
197 None
198 }
199
200 pub fn add_context(&mut self, key: impl Into<String>, value: toml::Value) {
201 self.context.insert(key.into(), value);
202 }
203
204 pub fn debug(&self) -> bool {
205 self.debug
206 }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
210pub enum XmlWhitespaceHandling {
212 Unspecified,
214 Inherit,
216 Preserve,
218 Collapse,
220}
221
222impl Default for XmlWhitespaceHandling {
223 fn default() -> Self {
224 XmlWhitespaceHandling::Unspecified
225 }
226}
227
228impl XmlWhitespaceHandling {
229 fn collapse() -> Self {
230 XmlWhitespaceHandling::Collapse
231 }
232}
233
234#[derive(Debug, Clone, Deserialize, PartialEq, Copy, Default)]
235pub enum XmlAnnotationHandling {
236 #[default]
238 Unspecified,
239
240 None,
242
243 TextSelector,
245
246 ResourceSelector,
248
249 TextSelectorBetweenMarkers,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct XmlElementConfig {
256 #[serde(default)]
259 path: XPathExpression,
260
261 #[serde(default)]
262 annotation: XmlAnnotationHandling,
263
264 #[serde(default)]
265 annotationdata: Vec<XmlAnnotationDataConfig>,
266
267 #[serde(default)]
269 textprefix: Option<String>,
270
271 #[serde(default)]
273 text: Option<bool>,
274
275 #[serde(default)]
277 textsuffix: Option<String>,
278
279 #[serde(default)]
281 annotatetextprefix: Vec<XmlAnnotationDataConfig>,
282
283 #[serde(default)]
285 annotatetextsuffix: Vec<XmlAnnotationDataConfig>,
286
287 #[serde(default)]
289 include_textprefix: Option<bool>,
290
291 #[serde(default)]
293 include_textsuffix: Option<bool>,
294
295 #[serde(default)]
297 base: Vec<String>,
298
299 #[serde(default)]
301 id: Option<String>,
302
303 #[serde(default)]
304 stop: Option<bool>,
306
307 #[serde(default)]
308 whitespace: XmlWhitespaceHandling,
310}
311
312impl XmlElementConfig {
313 fn new(expression: XPathExpression) -> Self {
314 Self {
315 path: expression,
316 stop: None,
317 whitespace: XmlWhitespaceHandling::Unspecified,
318 annotation: XmlAnnotationHandling::Unspecified,
319 annotationdata: Vec::new(),
320 base: Vec::new(),
321 id: None,
322 textprefix: None,
323 text: None,
324 textsuffix: None,
325 annotatetextprefix: Vec::new(),
326 annotatetextsuffix: Vec::new(),
327 include_textprefix: None,
328 include_textsuffix: None,
329 }
330 }
331
332 pub fn update(&mut self, base: &XmlElementConfig) {
333 if self.whitespace == XmlWhitespaceHandling::Unspecified
334 && base.whitespace != XmlWhitespaceHandling::Unspecified
335 {
336 self.whitespace = base.whitespace;
337 }
338 if self.annotation == XmlAnnotationHandling::Unspecified
339 && base.annotation != XmlAnnotationHandling::Unspecified
340 {
341 self.annotation = base.annotation;
342 }
343 if self.textprefix.is_none() && base.textprefix.is_some() {
344 self.textprefix = base.textprefix.clone();
345 }
346 if self.text.is_none() && base.text.is_some() {
347 self.text = base.text;
348 }
349 if self.textsuffix.is_none() && base.textsuffix.is_some() {
350 self.textsuffix = base.textsuffix.clone();
351 }
352 if self.id.is_none() && base.id.is_some() {
353 self.id = base.id.clone();
354 }
355 if self.stop.is_none() && base.stop.is_some() {
356 self.stop = base.stop;
357 }
358 for annotationdata in base.annotationdata.iter() {
359 if !self.annotationdata.contains(annotationdata) {
360 self.annotationdata.push(annotationdata.clone());
361 }
362 }
363 if self.annotatetextsuffix.is_empty() && !base.annotatetextsuffix.is_empty() {
364 self.annotatetextsuffix = base.annotatetextsuffix.clone();
365 }
366 if self.annotatetextprefix.is_empty() && !base.annotatetextprefix.is_empty() {
367 self.annotatetextprefix = base.annotatetextprefix.clone();
368 }
369 if self.include_textsuffix.is_none() {
370 self.include_textsuffix = base.include_textsuffix;
371 }
372 if self.include_textprefix.is_none() {
373 self.include_textprefix = base.include_textprefix;
374 }
375 }
376
377
378 pub fn with_stop(mut self, stop: bool) -> Self {
380 self.stop = Some(stop);
381 self
382 }
383
384 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
386 self.whitespace = handling;
387 self
388 }
389
390 pub fn with_text(mut self, text: bool) -> Self {
391 self.text = Some(text);
392 self
393 }
394
395 pub fn with_base(mut self, iter: impl Iterator<Item = impl Into<String>>) -> Self {
396 self.base = iter.into_iter().map(|s| s.into()).collect();
397 self
398 }
399
400 pub fn without_text(mut self) -> Self {
401 self.text = None;
402 self
403 }
404
405 pub fn with_annotation(mut self, annotation: XmlAnnotationHandling) -> Self {
406 self.annotation = annotation;
407 self
408 }
409
410 fn hash(&self) -> usize {
412 self.path.0.as_ptr() as usize
413 }
414}
415
416impl PartialEq for XmlElementConfig {
417 fn eq(&self, other: &Self) -> bool {
418 self.hash() == other.hash()
419 }
420}
421
422#[derive(Debug, Clone, Deserialize, PartialEq)]
423pub struct XmlAnnotationDataConfig {
424 id: Option<String>,
426 set: Option<String>,
428 key: Option<String>,
430 value: Option<toml::Value>,
432
433 #[serde(default)]
435 valuetype: Option<String>,
436
437 #[serde(default)]
439 allow_empty_value: bool,
440
441 #[serde(default)]
443 skip_if_missing: bool,
444
445
446 #[serde(default)]
448 multiple: bool,
449}
450
451impl XmlAnnotationDataConfig {
452 pub fn with_id(mut self, id: impl Into<String>) -> Self {
453 self.id = Some(id.into());
454 self
455 }
456
457 pub fn with_set(mut self, set: impl Into<String>) -> Self {
458 self.set = Some(set.into());
459 self
460 }
461
462 pub fn with_key(mut self, key: impl Into<String>) -> Self {
463 self.key = Some(key.into());
464 self
465 }
466
467 pub fn with_value(mut self, value: impl Into<toml::Value>) -> Self {
468 self.value = Some(value.into());
469 self
470 }
471}
472
473#[derive(Debug, Clone, PartialEq, Deserialize)]
475struct XPathExpression(String);
476
477impl XPathExpression {
478 pub fn new(expression: impl Into<String>) -> Self {
479 Self(expression.into())
480 }
481
482 pub fn any() -> Self {
483 Self("*".into())
484 }
485
486 pub fn iter<'a>(
487 &'a self,
488 config: &'a XmlConversionConfig,
489 ) -> impl Iterator<Item = (Option<&'a str>, &'a str, Option<&'a str>)> {
490 self.0.trim_start_matches('/').split("/").map(|segment| {
491 let (prefix, name, condition) = Self::parse_segment(segment);
493 let namespace = if let Some(prefix) = prefix {
494 if let Some(namespace) = config.namespaces.get(prefix).map(|x| x.as_str()) {
495 Some(namespace)
496 } else {
497 panic!(
498 "XML namespace prefix not known in configuration: {}",
499 prefix
500 );
501 }
502 } else {
503 None
504 };
505 (namespace, name, condition)
506 })
507 }
508
509 fn test<'a, 'b>(&self, path: &NodePath<'a, 'b>, mut node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
511 let mut pathiter = path.components.iter().rev();
512 for (refns, refname, condition) in self.iter(config).collect::<Vec<_>>().into_iter().rev() {
513 if let Some(component) = pathiter.next() {
514 if refname != "*" && refname != "" {
518 if refns.is_none() != component.namespace.is_none() || component.namespace != refns || refname != component.tagname {
519 return false;
520 }
521 }
522 if let Some(condition) = condition {
523 if !self.test_condition(condition, node, config) {
524 return false;
525 }
526 }
527 if let Some(parent) = node.parent() {
528 node = parent;
529 }
530 } else {
531 if refname != "" {
532 return false;
533 }
534 }
535 }
536 true
540 }
541
542 fn test_condition<'a,'b>(&self, condition: &'a str, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
543 for condition in condition.split(" and ") { if let Some(pos) = condition.find("!=") {
545 let var = &condition[..pos];
546 let right = condition[pos+2..].trim_matches('"');
547 if self.get_var(var, &node, config) == Some(right) {
548 return false;
549 }
550 } else if let Some(pos) = condition.find("=") {
551 let var = &condition[..pos];
552 let right = condition[pos+1..].trim_matches('"');
553 let value = self.get_var(var, &node, config);
554 if value != Some(right) {
555 return false;
556 }
557 } else {
558 let v = self.get_var(condition, &node, config);
560 if v.is_none() || v == Some("") {
561 return false;
562 }
563 }
564 }
565 true
569 }
570
571 fn get_var<'a,'b>(&self, var: &str, node: &Node<'a,'b>, config: &XmlConversionConfig) -> Option<&'a str> {
573 if var.starts_with("@") {
574 if let Some(pos) = var.find(":") {
575 let prefix = &var[1..pos];
576 if let Some(ns) = config.namespaces.get(prefix) {
577 let var = &var[pos+1..];
578 node.attribute((ns.as_str(),var))
579 } else {
580 None
581 }
582 } else {
583 node.attribute(&var[1..])
584 }
585 } else if var == "text()" {
586 node.text().map(|s|s.trim())
587 } else {
588 None
589 }
590 }
591
592 fn parse_segment<'a>(s: &'a str) -> (Option<&'a str>, &'a str, Option<&'a str>) {
594 let (name, condition) = if let (Some(begin), Some(end)) = (s.find("["), s.rfind("]")) {
595 (&s[..begin], Some(&s[begin + 1..end]))
596 } else {
597 (s, None)
598 };
599 if let Some((prefix, name)) = name.split_once(":") {
600 (Some(prefix), name, condition)
601 } else {
602 (None, name, condition)
603 }
604 }
605}
606
607
608
609impl Default for XPathExpression {
610 fn default() -> Self {
611 Self::any()
612 }
613}
614
615#[derive(Clone, Debug, PartialEq)]
616struct NodePathComponent<'a,'b> {
617 namespace: Option<&'a str>,
618 tagname: &'b str,
619 index: Option<usize>,
621}
622
623#[derive(Clone, Debug, PartialEq, Default)]
624struct NodePath<'a, 'b> {
625 components: Vec<NodePathComponent<'a,'b>>,
626}
627
628impl<'a, 'b> Display for NodePath<'a, 'b> {
629 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
630 for component in self.components.iter() {
631 write!(f, "/")?;
632 if let Some(ns) = component.namespace {
633 if let Some(index) = component.index {
634 write!(f, "{{{}}}{}[{}]", ns, component.tagname, index)?;
635 } else {
636 write!(f, "{{{}}}{}", ns, component.tagname)?;
637 }
638 } else {
639 if let Some(index) = component.index {
640 write!(f, "{}[{}]", component.tagname, index)?;
641 } else {
642 write!(f, "{}", component.tagname)?;
643 }
644 }
645 }
646 Ok(())
647 }
648}
649
650impl<'a,'b> NodePath<'a,'b> {
651 fn add(&mut self, node: &Node<'a,'b>, index: Option<usize>) {
652 if node.tag_name().name() != "" {
653 self.components.push(
654 NodePathComponent {
655 namespace: node.tag_name().namespace(),
656 tagname: node.tag_name().name(),
657 index,
658 }
659 )
660 }
661 }
662
663 fn format_as_xpath(&self, prefixes: &HashMap<String, String>) -> String {
664 let mut out = String::new();
665 for component in self.components.iter() {
666 out.push('/');
667 if let Some(ns) = component.namespace {
668 if let Some(prefix) = prefixes.get(ns) {
669 if let Some(index) = component.index {
670 out += &format!("{}:{}[{}]", prefix, component.tagname, index);
671 } else {
672 out += &format!("{}:{}", prefix, component.tagname);
673 }
674 } else {
675 eprintln!("STAM fromxml WARNING: format_as_xpath: namespace {} not defined, no prefix found!", ns);
676 if let Some(index) = component.index {
677 out += &format!("{}[{}]", component.tagname, index);
678 } else {
679 out += &format!("{}", component.tagname);
680 }
681 }
682 } else {
683 if let Some(index) = component.index {
684 out += &format!("{}[{}]", component.tagname, index);
685 } else {
686 out += &format!("{}", component.tagname);
687 }
688 }
689 }
690 out
691 }
692}
693
694
695#[derive(Default,Debug)]
697struct SiblingCounter {
698 map: HashMap<String,usize>,
699}
700
701impl SiblingCounter {
702 fn count<'a,'b>(&mut self, node: &Node<'a,'b>) -> usize {
703 let s = format!("{:?}", node.tag_name());
704 *self.map.entry(s).and_modify(|c| {*c += 1;}).or_insert(1)
705 }
706}
707
708
709#[derive(Debug, Clone, Deserialize)]
710pub struct MetadataConfig {
712 #[serde(default)]
714 annotation: XmlAnnotationHandling,
715
716 #[serde(default)]
717 annotationdata: Vec<XmlAnnotationDataConfig>,
718
719 #[serde(default)]
721 id: Option<String>,
722}
723
724pub fn from_xml<'a>(
726 filename: &Path,
727 config: &XmlConversionConfig,
728 store: &'a mut AnnotationStore,
729) -> Result<(), String> {
730 if config.debug {
731 eprintln!("[STAM fromxml] parsing {}", filename.display());
732 }
733
734 let mut xmlstring = read_to_string(filename)
736 .map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
737
738 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
740 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
741 }
742
743 if xmlstring[..100].find("<!DOCTYPE").is_none() {
745 if let Some(dtd) = config.inject_dtd.as_ref() {
746 xmlstring = dtd.to_string() + &xmlstring
747 };
748 } else if config.inject_dtd.is_some() {
749 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
750 }
751
752 let doc = Document::parse_with_options(
754 &xmlstring,
755 ParsingOptions {
756 allow_dtd: true,
757 ..ParsingOptions::default()
758 },
759 )
760 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
761
762 let mut converter = XmlToStamConverter::new(config);
763 converter
764 .compile()
765 .map_err(|e| format!("Error compiling templates: {}", e))?;
766
767 let textoutfilename = format!(
768 "{}.txt",
769 filename
770 .file_stem()
771 .expect("invalid filename")
772 .to_str()
773 .expect("invalid utf-8 in filename")
774 );
775
776 let mut path = NodePath::default();
778 path.add(&doc.root_element(), None);
779 converter
780 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), 0)
781 .map_err(|e| {
782 format!(
783 "Error extracting element text from {}: {}",
784 filename.display(),
785 e
786 )
787 })?;
788 if config.debug {
789 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
790 }
791 let resource = TextResourceBuilder::new()
792 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
793 .with_text(converter.text.clone())
794 .with_filename(&textoutfilename);
795
796 converter.resource_handle = Some(
797 store
798 .add_resource(resource)
799 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
800 );
801
802 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
803
804 converter
806 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),0, store)
807 .map_err(|e| {
808 format!(
809 "Error extracting element annotation from {}: {}",
810 filename.display(),
811 e
812 )
813 })?;
814
815 Ok(())
816}
817
818pub fn from_multi_xml<'a>(
820 filenames: &Vec<&Path>,
821 outputfile: Option<&Path>,
822 config: &XmlConversionConfig,
823 store: &'a mut AnnotationStore,
824) -> Result<(), String> {
825
826 let textoutfilename = if let Some(outputfile) = outputfile {
827 format!("{}",outputfile.to_str().expect("invalid utf-8 in filename"))
828 } else {
829 format!(
830 "{}.txt",
831 filenames.iter().next().expect("1 or more filename need to be provided")
832 .file_stem()
833 .expect("invalid filename")
834 .to_str()
835 .expect("invalid utf-8 in filename")
836 )
837 };
838
839 let mut xmlstrings: Vec<String> = Vec::new();
841 let mut docs: Vec<Document> = Vec::new();
842 for filename in filenames.iter() {
843 if config.debug {
844 eprintln!("[STAM fromxml] parsing {} (one of multiple)", filename.display());
845 }
846 let mut xmlstring = read_to_string(filename).map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
848 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
849 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
850 }
851 if xmlstring[..100].find("<!DOCTYPE").is_none() {
853 if let Some(dtd) = config.inject_dtd.as_ref() {
854 xmlstring = dtd.to_string() + &xmlstring
855 };
856 } else if config.inject_dtd.is_some() {
857 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
858 }
859 xmlstrings.push(xmlstring);
860 }
861
862 for (filename, xmlstring) in filenames.iter().zip(xmlstrings.iter()) {
863 let doc = Document::parse_with_options(
865 xmlstring,
866 ParsingOptions {
867 allow_dtd: true,
868 ..ParsingOptions::default()
869 },
870 )
871 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
872 docs.push(doc);
873 }
874
875 let mut converter = XmlToStamConverter::new(config);
876 converter
877 .compile()
878 .map_err(|e| format!("Error compiling templates: {}", e))?;
879
880 for (i, (doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
881 let mut path = NodePath::default();
882 path.add(&doc.root_element(), None);
883 converter
885 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), i)
886 .map_err(|e| {
887 format!(
888 "Error extracting element text from {}: {}",
889 filename.display(),
890 e
891 )
892 })?;
893 if config.debug {
894 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
895 }
896 }
897
898 let resource = TextResourceBuilder::new()
899 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
900 .with_text(converter.text.clone())
901 .with_filename(&textoutfilename);
902
903 converter.resource_handle = Some(
904 store
905 .add_resource(resource)
906 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
907 );
908
909 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
910
911 for (i,(doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
913 let mut path = NodePath::default();
914 path.add(&doc.root_element(), None);
915 converter
916 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),i, store)
917 .map_err(|e| {
918 format!(
919 "Error extracting element annotation from {}: {}",
920 filename.display(),
921 e
922 )
923 })?;
924 }
925
926 Ok(())
927}
928
929pub fn from_xml_in_memory<'a>(
931 resource_id: &str,
932 xmlstring: &str,
933 config: &XmlConversionConfig,
934 store: &'a mut AnnotationStore,
935) -> Result<(), String> {
936 if config.debug {
937 eprintln!("[STAM fromxml] parsing XML string");
938 }
939
940 let doc = Document::parse_with_options(
942 &xmlstring,
943 ParsingOptions {
944 allow_dtd: true,
945 ..ParsingOptions::default()
946 },
947 )
948 .map_err(|e| format!("Error parsing XML string: {}", e))?;
949
950 let mut converter = XmlToStamConverter::new(config);
951 converter
952 .compile()
953 .map_err(|e| format!("Error compiling templates: {}", e))?;
954
955 let mut path = NodePath::default();
956 path.add(&doc.root_element(), None);
957 converter
959 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(resource_id), Some(resource_id), 0)
960 .map_err(|e| {
961 format!(
962 "Error extracting element text from {}: {}",
963 resource_id,
964 e
965 )
966 })?;
967 if config.debug {
968 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
969 }
970 let resource = TextResourceBuilder::new()
971 .with_id(resource_id)
972 .with_text(converter.text.clone());
973
974 converter.resource_handle = Some(
975 store
976 .add_resource(resource)
977 .map_err(|e| format!("Failed to add resource {}: {}", &resource_id, e))?,
978 );
979
980 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata for {}: {}", &resource_id, e))?;
981
982 converter
984 .extract_element_annotation(doc.root_element(), &path, Some(resource_id), 0, store)
985 .map_err(|e| {
986 format!(
987 "Error extracting element annotation from {}: {}",
988 resource_id,
989 e
990 )
991 })?;
992
993 Ok(())
994}
995
996pub fn filename_to_id<'a>(filename: &'a str, config: &XmlConversionConfig) -> &'a str {
997 for suffix in config.id_strip_suffix.iter() {
998 if filename.ends_with(suffix) {
999 return &filename[..filename.len() - suffix.len()];
1000 }
1001 }
1002 return filename;
1003}
1004
1005#[derive(Clone,Copy,PartialEq, Hash, Eq)]
1006enum PositionType {
1007 Body,
1008 TextPrefix,
1009 TextSuffix,
1010}
1011
1012struct XmlToStamConverter<'a> {
1013 cursor: usize,
1015
1016 text: String,
1018
1019 template_engine: Engine<'a>,
1021
1022 positionmap: HashMap<(usize,NodeId,PositionType), Offset>,
1024
1025 bytepositionmap: HashMap<(usize,NodeId,PositionType), (usize, usize)>,
1027
1028 markers: HashMap<usize, Vec<(usize,NodeId)>>,
1030
1031 resource_handle: Option<TextResourceHandle>,
1033
1034 pending_whitespace: bool,
1036
1037 config: &'a XmlConversionConfig,
1039
1040 prefixes: HashMap<String, String>,
1042
1043 global_context: BTreeMap<String, upon::Value>,
1045
1046 variables: BTreeMap<String, BTreeSet<&'a str>>,
1048
1049 debugindent: String,
1050}
1051
1052pub enum XmlConversionError {
1053 StamError(StamError),
1054 TemplateError(String, Option<upon::Error>),
1055 ConfigError(String),
1056}
1057
1058impl From<StamError> for XmlConversionError {
1059 fn from(error: StamError) -> Self {
1060 Self::StamError(error)
1061 }
1062}
1063
1064impl From<upon::Error> for XmlConversionError {
1065 fn from(error: upon::Error) -> Self {
1066 Self::TemplateError("".into(), Some(error))
1067 }
1068}
1069
1070impl Display for XmlConversionError {
1071 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1072 match self {
1073 Self::StamError(e) => e.fmt(f),
1074 Self::TemplateError(s, e) => {
1075 f.write_str(s.as_str())?;
1076 f.write_str(": ")?;
1077 if let Some(e) = e {
1078 e.fmt(f)?;
1079 }
1080 f.write_str("")
1081 }
1082 Self::ConfigError(e) => e.fmt(f),
1083 }
1084 }
1085}
1086
1087impl<'a> XmlToStamConverter<'a> {
1088 fn new(config: &'a XmlConversionConfig) -> Self {
1089 let mut prefixes: HashMap<String, String> = HashMap::new();
1090 for (prefix, namespace) in config.namespaces.iter() {
1091 prefixes.insert(namespace.to_string(), prefix.to_string());
1092 }
1093 let mut template_engine = Engine::new();
1094 template_engine.set_default_formatter(&value_formatter); template_engine.add_function("capitalize", filter_capitalize);
1096 template_engine.add_function("lower", str::to_lowercase);
1097 template_engine.add_function("upper", str::to_uppercase);
1098 template_engine.add_function("trim", |s: &str| s.trim().to_string() );
1099 template_engine.add_function("add", filter_add);
1100 template_engine.add_function("sub", filter_sub);
1101 template_engine.add_function("mul", filter_mul);
1102 template_engine.add_function("div", filter_div);
1103 template_engine.add_function("eq", |a: &upon::Value, b: &upon::Value| a == b);
1104 template_engine.add_function("ne", |a: &upon::Value, b: &upon::Value| a != b);
1105 template_engine.add_function("gt", filter_gt);
1106 template_engine.add_function("lt", filter_lt);
1107 template_engine.add_function("gte", filter_gte);
1108 template_engine.add_function("lte", filter_lte);
1109 template_engine.add_function("int", |a: &upon::Value| match a {
1110 upon::Value::Integer(x) => upon::Value::Integer(*x),
1111 upon::Value::Float(x) => upon::Value::Integer(*x as i64),
1112 upon::Value::String(s) => upon::Value::Integer(s.parse().expect("int filter expects an integer value")),
1113 _ => panic!("int filter expects an integer value"), });
1115 template_engine.add_function("float", |a: &upon::Value| match a {
1116 upon::Value::Float(_) => a.clone(),
1117 upon::Value::Integer(x) => upon::Value::Float(*x as f64),
1118 upon::Value::String(s) => upon::Value::Float(s.parse().expect("float filter expects a float value")),
1119 _ => panic!("int filter expects an integer value"), });
1121 template_engine.add_function("str", |a: upon::Value| match a {
1122 upon::Value::Integer(x) => upon::Value::String(format!("{}",x)),
1123 upon::Value::Float(x) => upon::Value::String(format!("{}",x)),
1124 upon::Value::Bool(x) => upon::Value::String(format!("{}",x)),
1125 upon::Value::String(_) => a,
1126 upon::Value::None => upon::Value::String(String::new()),
1127 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1129 upon::Value::String(s) => s.clone(),
1130 upon::Value::Integer(d) => format!("{}",d),
1131 upon::Value::Float(d) => format!("{}",d),
1132 upon::Value::Bool(d) => format!("{}",d),
1133 _ => String::new(),
1134 }).collect();
1135 upon::Value::String(newlist.join(", "))
1136 },
1137 _ => panic!("map to string not implemented"), });
1139 template_engine.add_function("as_range", |a: i64| upon::Value::List(std::ops::Range { start: 0, end: a }.into_iter().map(|x| upon::Value::Integer(x+1)).collect::<Vec<_>>()) );
1140 template_engine.add_function("last", |list: &[upon::Value]| list.last().map(Clone::clone));
1141 template_engine.add_function("first", |list: &[upon::Value]| {
1142 list.first().map(Clone::clone)
1143 });
1144 template_engine.add_function("tokenize", |s: &str| {
1145 upon::Value::List(
1146 s.split(|c| c == ' ' || c == '\n').filter_map(|x|
1147 if !x.is_empty() {
1148 Some(upon::Value::String(x.to_string()))
1149 } else {
1150 None
1151 }
1152 )
1153 .collect::<Vec<upon::Value>>())
1154 });
1155 template_engine.add_function("replace", |s: &str, from: &str, to: &str| {
1156 upon::Value::String(s.replace(from,to))
1157 });
1158 template_engine.add_function("starts_with", |s: &str, prefix: &str| {
1159 s.starts_with(prefix)
1160 });
1161 template_engine.add_function("ends_with", |s: &str, suffix: &str| {
1162 s.ends_with(suffix)
1163 });
1164 template_engine.add_function("basename", |a: &upon::Value| match a {
1165 upon::Value::String(s) => upon::Value::String(s.split(|c| c == '/' || c == '\\').last().expect("splitting must work").to_string()),
1166 _ => panic!("basename filter expects a string value"), });
1168 template_engine.add_function("noext", |a: &upon::Value| match a {
1169 upon::Value::String(s) => if let Some(pos) = s.rfind('.') {
1170 s[..pos].to_string()
1171 } else {
1172 s.to_string()
1173 },
1174 _ => panic!("basename filter expects a string value"), });
1176 template_engine.add_function("join", |list: &upon::Value, delimiter: &str| match list {
1177 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1179 upon::Value::String(s) => s.clone(),
1180 upon::Value::Integer(d) => format!("{}",d),
1181 upon::Value::Float(d) => format!("{}",d),
1182 upon::Value::Bool(d) => format!("{}",d),
1183 _ => String::new(),
1184 }).collect();
1185 upon::Value::String(newlist.join(delimiter))
1186 },
1187 _ => {
1188 list.clone() }
1190 });
1191 let mut converter = Self {
1192 cursor: 0,
1193 text: String::new(),
1194 template_engine,
1195 positionmap: HashMap::new(),
1196 bytepositionmap: HashMap::new(),
1197 markers: HashMap::new(),
1198 resource_handle: None,
1199 pending_whitespace: false,
1200 global_context: BTreeMap::new(),
1201 debugindent: String::new(),
1202 variables: BTreeMap::new(),
1203 prefixes,
1204 config,
1205 };
1206 converter.set_global_context();
1207 converter.add_external_filters();
1208 converter
1209 }
1210
1211 fn add_external_filters(&mut self) {
1212 for filter in self.config.external_filters.clone() {
1213 self.template_engine.add_function(filter.name.clone(), move |value: &upon::Value| filter.run(value) );
1214 }
1215 }
1216
1217 fn compile(&mut self) -> Result<(), XmlConversionError> {
1219 if self.config.debug {
1220 eprintln!("[STAM fromxml] compiling templates");
1221 }
1222 for element in self.config.elements.iter() {
1223 if let Some(textprefix) = element.textprefix.as_ref() {
1224 if self.template_engine.get_template(textprefix.as_str()).is_none() {
1225 let template = self.precompile(textprefix.as_str());
1226 self.template_engine
1227 .add_template(textprefix.clone(), template)
1228 .map_err(|e| {
1229 XmlConversionError::TemplateError(
1230 format!("element/textprefix template {}", textprefix.clone()),
1231 Some(e),
1232 )
1233 })?;
1234 }
1235 }
1236 if let Some(textsuffix) = element.textsuffix.as_ref() {
1237 if self.template_engine.get_template(textsuffix.as_str()).is_none() {
1238 let template = self.precompile(textsuffix.as_str());
1239 self.template_engine
1240 .add_template(textsuffix.clone(), template)
1241 .map_err(|e| {
1242 XmlConversionError::TemplateError(
1243 format!("element/textsuffix template {}", textsuffix.clone()),
1244 Some(e),
1245 )
1246 })?;
1247 }
1248 }
1249 if let Some(id) = element.id.as_ref() {
1250 if self.template_engine.get_template(id.as_str()).is_none() {
1251 let template = self.precompile(id.as_str());
1252 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1253 XmlConversionError::TemplateError(
1254 format!("element/id template {}", id.clone()),
1255 Some(e),
1256 )
1257 })?;
1258 }
1259 }
1260 for annotationdata in element.annotationdata.iter().chain(element.annotatetextprefix.iter()).chain(element.annotatetextsuffix.iter()) {
1261 if let Some(id) = annotationdata.id.as_ref() {
1262 if self.template_engine.get_template(id.as_str()).is_none() {
1263 let template = self.precompile(id.as_str());
1264 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1265 XmlConversionError::TemplateError(
1266 format!("annotationdata/id template {}", id.clone()),
1267 Some(e),
1268 )
1269 })?;
1270 }
1271 }
1272 if let Some(set) = annotationdata.set.as_ref() {
1273 if self.template_engine.get_template(set.as_str()).is_none() {
1274 let template = self.precompile(set.as_str());
1275 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1277 XmlConversionError::TemplateError(
1278 format!("annotationdata/set template {}", set.clone()),
1279 Some(e),
1280 )
1281 })?;
1282 }
1283 }
1284 if let Some(key) = annotationdata.key.as_ref() {
1285 if self.template_engine.get_template(key.as_str()).is_none() {
1286 let template = self.precompile(key.as_str());
1287 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1288 XmlConversionError::TemplateError(
1289 format!("annotationdata/key template {}", key.clone()),
1290 Some(e),
1291 )
1292 })?;
1293 }
1294 }
1295 if let Some(value) = annotationdata.value.as_ref() {
1296 self.compile_value(value)?;
1297 }
1298 }
1299 }
1300 for metadata in self.config.metadata.iter() {
1301 if let Some(id) = metadata.id.as_ref() {
1302 if self.template_engine.get_template(id.as_str()).is_none() {
1303 let template = self.precompile(id.as_str());
1304 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1305 XmlConversionError::TemplateError(
1306 format!("metadata/id template {}", id.clone()),
1307 Some(e),
1308 )
1309 })?;
1310 }
1311 }
1312 for annotationdata in metadata.annotationdata.iter() {
1313 if let Some(id) = annotationdata.id.as_ref() {
1314 if self.template_engine.get_template(id.as_str()).is_none() {
1315 let template = self.precompile(id.as_str());
1316 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1317 XmlConversionError::TemplateError(
1318 format!("annotationdata/id template {}", id.clone()),
1319 Some(e),
1320 )
1321 })?;
1322 }
1323 }
1324 if let Some(set) = annotationdata.set.as_ref() {
1325 if self.template_engine.get_template(set.as_str()).is_none() {
1326 let template = self.precompile(set.as_str());
1327 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1329 XmlConversionError::TemplateError(
1330 format!("annotationdata/set template {}", set.clone()),
1331 Some(e),
1332 )
1333 })?;
1334 }
1335 }
1336 if let Some(key) = annotationdata.key.as_ref() {
1337 if self.template_engine.get_template(key.as_str()).is_none() {
1338 let template = self.precompile(key.as_str());
1339 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1340 XmlConversionError::TemplateError(
1341 format!("annotationdata/key template {}", key.clone()),
1342 Some(e),
1343 )
1344 })?;
1345 }
1346 }
1347 if let Some(value) = annotationdata.value.as_ref() {
1348 self.compile_value(value)?;
1349 }
1350 }
1351 }
1352 Ok(())
1353 }
1354
1355 fn compile_value(&mut self, value: &'a toml::Value) -> Result<(), XmlConversionError> {
1357 match value {
1358 toml::Value::String(value) => {
1359 if self.template_engine.get_template(value.as_str()).is_none() {
1360 let template = self.precompile(value.as_str());
1361 self.template_engine.add_template(value.clone(), template).map_err(|e| {
1362 XmlConversionError::TemplateError(
1363 format!("annotationdata/value template {}", value.clone()),
1364 Some(e),
1365 )
1366 })?;
1367 }
1368 }
1369 toml::Value::Table(map) => {
1370 for (_key, value) in map.iter() {
1371 self.compile_value(value)?;
1372 }
1373 },
1374 toml::Value::Array(list) => {
1375 for value in list.iter() {
1376 self.compile_value(value)?;
1377 }
1378 }
1379 _ => {} }
1381 Ok(())
1382 }
1383
1384 fn extract_element_text<'b>(
1389 &mut self,
1390 node: Node<'a,'b>,
1391 path: &NodePath<'a,'b>,
1392 whitespace: XmlWhitespaceHandling,
1393 resource_id: Option<&str>,
1394 inputfile: Option<&str>,
1395 doc_num: usize,
1396 ) -> Result<(), XmlConversionError> {
1397 if self.config.debug {
1398 eprintln!("[STAM fromxml]{} extracting text for element {}", self.debugindent, path);
1399 }
1400 let mut begin = self.cursor; let mut bytebegin = self.text.len(); let mut end_discount = 0; let mut end_bytediscount = 0;
1404 let mut firsttext = true; let mut elder_siblings = SiblingCounter::default();
1407
1408 if let Some(element_config) = self.config.element_config(node, path) {
1410 if self.config.debug {
1411 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1412 }
1413
1414 if (element_config.stop == Some(false) || element_config.stop.is_none())
1415 && element_config.annotation != XmlAnnotationHandling::TextSelectorBetweenMarkers
1416 {
1417 let whitespace = if node.has_attribute((NS_XML, "space")) {
1420 match node.attribute((NS_XML, "space")).unwrap() {
1422 "preserve" => XmlWhitespaceHandling::Preserve,
1423 "collapse" | "replace" => XmlWhitespaceHandling::Collapse,
1424 _ => whitespace,
1425 }
1426 } else if element_config.whitespace == XmlWhitespaceHandling::Inherit
1427 || element_config.whitespace == XmlWhitespaceHandling::Unspecified
1428 {
1429 whitespace } else {
1431 element_config.whitespace };
1433
1434 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1436
1437 let textbegin = self.cursor;
1438 for child in node.children() {
1440 if self.config.debug {
1441 eprintln!("[STAM fromxml]{} child {:?}", self.debugindent, child);
1442 }
1443 if child.is_text() && element_config.text == Some(true) {
1444 let mut innertext = child.text().expect("text node must have text");
1448 let mut pending_whitespace = false;
1449 let mut leading_whitespace = false;
1450 if whitespace == XmlWhitespaceHandling::Collapse && !innertext.is_empty() {
1451 let mut all_whitespace = true;
1453 leading_whitespace = innertext.chars().next().unwrap().is_whitespace();
1454
1455 pending_whitespace = innertext
1458 .chars()
1459 .inspect(|c| {
1460 if !c.is_whitespace() {
1461 all_whitespace = false
1462 }
1463 })
1464 .last()
1465 .unwrap()
1466 .is_whitespace();
1467 if all_whitespace {
1468 self.pending_whitespace = true;
1469 if self.config.debug {
1470 eprintln!(
1471 "[STAM fromxml]{} ^- all whitespace, flag pending whitespace and skipping...",
1472 self.debugindent,
1473 );
1474 }
1475 continue;
1476 }
1477 innertext = innertext.trim();
1478 if self.config.debug {
1479 eprintln!(
1480 "[STAM fromxml]{} ^- collapsed whitespace: {:?}",
1481 self.debugindent,
1482 innertext
1483 );
1484 }
1485 }
1486 if self.pending_whitespace || leading_whitespace {
1487 if !self.text.is_empty()
1489 && !self.text.chars().rev().next().unwrap().is_whitespace()
1490 {
1491 if self.config.debug {
1492 eprintln!("[STAM fromxml]{} ^- outputting pending whitespace",self.debugindent);
1493 }
1494 self.text.push(' ');
1495 self.cursor += 1;
1496 if firsttext && self.pending_whitespace {
1497 begin += 1;
1498 bytebegin += 1;
1499 firsttext = false;
1500 }
1501 }
1502 self.pending_whitespace = false;
1503 }
1504
1505 if whitespace == XmlWhitespaceHandling::Collapse {
1507 let mut prevc = ' ';
1508 let mut innertext = innertext.replace(|c: char| c.is_whitespace(), " ");
1509 innertext.retain(|c| {
1510 let do_retain = c != ' ' || prevc != ' ';
1511 prevc = c;
1512 do_retain
1513 });
1514 self.text += &innertext;
1515 self.cursor += innertext.chars().count();
1516 if self.config.debug {
1517 eprintln!("[STAM fromxml]{} ^- outputting text child (collapsed whitespace), cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1518 }
1519 } else {
1520 self.text += &innertext;
1521 self.cursor += innertext.chars().count();
1522 if self.config.debug {
1523 eprintln!("[STAM fromxml]{} ^- outputting text child, cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1524 }
1525 }
1526 self.pending_whitespace = pending_whitespace;
1527 } else if child.is_element() {
1528 if self.config.debug {
1529 eprintln!("[STAM fromxml]{} \\- extracting text for this child", self.debugindent);
1530 }
1531 self.debugindent.push_str(" ");
1532 let mut path = path.clone();
1534 let count = elder_siblings.count(&child);
1535 path.add(&child, Some(count));
1536 self.extract_element_text(child, &path, whitespace, resource_id, inputfile, doc_num)?;
1537 self.debugindent.pop();
1538 self.debugindent.pop();
1539 } else {
1540 if self.config.debug {
1541 eprintln!("[STAM fromxml]{} ^- skipping this child node", self.debugindent);
1542 }
1543 continue;
1544 }
1545 }
1546
1547 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, textbegin)?;
1549 } else if element_config.annotation == XmlAnnotationHandling::TextSelectorBetweenMarkers
1550 {
1551 if self.config.debug {
1553 eprintln!("[STAM fromxml]{} adding to markers (textprefix={:?}, textsuffix={:?})", self.debugindent, element_config.textprefix, element_config.textsuffix);
1554 }
1555
1556
1557 self.markers
1558 .entry(element_config.hash())
1559 .and_modify(|v| v.push((doc_num, node.id())))
1560 .or_insert(vec![(doc_num, node.id())]);
1561
1562 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1565 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, self.cursor)?;
1566 }
1567 } else if self.config.debug {
1568 eprintln!(
1569 "[STAM fromxml]{} WARNING: no match, skipping text extraction for element {}",
1570 self.debugindent,
1571 path
1572 );
1573 }
1574
1575 if begin <= (self.cursor - end_discount) {
1579 let offset = Offset::simple(begin, self.cursor - end_discount);
1580 if self.config.debug {
1581 eprintln!(
1582 "[STAM fromxml]{} extracted text for {} @{:?}: {:?}",
1583 self.debugindent,
1584 path,
1585 &offset,
1586 &self.text[bytebegin..(self.text.len() - end_bytediscount)]
1587 );
1588 }
1589 self.positionmap.insert((doc_num, node.id(), PositionType::Body), offset);
1590 self.bytepositionmap
1591 .insert((doc_num, node.id(), PositionType::Body), (bytebegin, self.text.len() - end_bytediscount));
1592 }
1593 Ok(())
1594 }
1595
1596 fn process_textprefix<'b>(
1598 &mut self,
1599 element_config: &XmlElementConfig,
1600 node: Node<'a,'b>,
1601 resource_id: Option<&str>,
1602 inputfile: Option<&str>,
1603 doc_num: usize,
1604 begin: &mut usize,
1605 bytebegin: &mut usize
1606 ) -> Result<(), XmlConversionError> {
1607 if let Some(textprefix) = &element_config.textprefix {
1608 self.pending_whitespace = false;
1609 if self.config.debug {
1610 eprintln!("[STAM fromxml]{} outputting textprefix: {:?}", self.debugindent, textprefix);
1611 }
1612 let result =
1613 self.render_template(textprefix, &node, Some(self.cursor), None, resource_id, inputfile, doc_num)
1614 .map_err(|e| match e {
1615 XmlConversionError::TemplateError(s, e) => {
1616 XmlConversionError::TemplateError(
1617 format!(
1618 "whilst rendering textprefix template '{}' for node '{}': {}",
1619 textprefix, node.tag_name().name(), s
1620 ),
1621 e,
1622 )
1623 }
1624 e => e,
1625 })?;
1626 let result_charlen = result.chars().count();
1627
1628 if !element_config.annotatetextprefix.is_empty() {
1629 let offset = Offset::simple(self.cursor, self.cursor + result_charlen);
1631 self.positionmap.insert((doc_num, node.id(), PositionType::TextPrefix), offset);
1632 self.bytepositionmap
1633 .insert((doc_num, node.id(), PositionType::TextPrefix), (*bytebegin, *bytebegin + result.len()));
1634 }
1635
1636 self.cursor += result_charlen;
1637 self.text += &result;
1638
1639 if element_config.include_textprefix != Some(true) {
1640 *begin += result_charlen;
1642 *bytebegin += result.len();
1643 }
1644 }
1645 Ok(())
1646 }
1647
1648 fn process_textsuffix<'b>(
1650 &mut self,
1651 element_config: &XmlElementConfig,
1652 node: Node<'a,'b>,
1653 resource_id: Option<&str>,
1654 inputfile: Option<&str>,
1655 doc_num: usize,
1656 end_discount: &mut usize,
1657 end_bytediscount: &mut usize,
1658 textbegin: usize,
1659 ) -> Result<(), XmlConversionError> {
1660 if let Some(textsuffix) = &element_config.textsuffix {
1661 if self.config.debug {
1662 eprintln!("[STAM fromxml]{} outputting textsuffix: {:?}", self.debugindent, textsuffix);
1663 }
1664 let result = self.render_template(
1665 textsuffix.as_str(),
1666 &node,
1667 Some(textbegin),
1668 Some(self.cursor),
1669 resource_id,
1670 inputfile,
1671 doc_num
1672 ).map_err(|e| match e {
1673 XmlConversionError::TemplateError(s, e) => {
1674 XmlConversionError::TemplateError(
1675 format!(
1676 "whilst rendering textsuffix template '{}' for node '{}': {}",
1677 textsuffix,
1678 node.tag_name().name(),
1679 s
1680 ),
1681 e,
1682 )
1683 }
1684 e => e,
1685 })?;
1686 let end_discount_tmp = result.chars().count();
1687 let end_bytediscount_tmp = result.len();
1688
1689
1690 self.text += &result;
1691
1692 if !element_config.annotatetextsuffix.is_empty() {
1693 let offset = Offset::simple(self.cursor, self.cursor + end_discount_tmp);
1695 self.positionmap.insert((doc_num, node.id(), PositionType::TextSuffix), offset);
1696 self.bytepositionmap
1697 .insert((doc_num, node.id(), PositionType::TextSuffix), (self.text.len() - end_bytediscount_tmp, self.text.len()));
1698 }
1699
1700 self.cursor += end_discount_tmp;
1701 self.pending_whitespace = false;
1702
1703 if element_config.include_textsuffix == Some(true) {
1704 *end_discount = 0;
1706 *end_bytediscount = 0;
1707 } else {
1708 *end_discount = end_discount_tmp;
1710 *end_bytediscount = end_bytediscount_tmp;
1711 }
1712 }
1713 Ok(())
1714 }
1715
1716 fn extract_element_annotation<'b>(
1721 &mut self,
1722 node: Node<'a,'b>,
1723 path: &NodePath<'a,'b>,
1724 inputfile: Option<&str>,
1725 doc_num: usize,
1726 store: &mut AnnotationStore,
1727 ) -> Result<(), XmlConversionError> {
1728 if self.config.debug {
1729 eprintln!("[STAM fromxml]{} extracting annotation from {}", self.debugindent, path);
1730 }
1731
1732 let mut elder_siblings = SiblingCounter::default();
1733
1734 if let Some(element_config) = self.config.element_config(node, &path) {
1736 if self.config.debug {
1737 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1738 }
1739 if element_config.annotation != XmlAnnotationHandling::None
1740 && element_config.annotation != XmlAnnotationHandling::Unspecified
1741 {
1742 let mut builder = AnnotationBuilder::new();
1743
1744 let offset = self.positionmap.get(&(doc_num, node.id(), PositionType::Body));
1746 if element_config.annotation == XmlAnnotationHandling::TextSelector {
1747 if let Some((beginbyte, endbyte)) = self.bytepositionmap.get(&(doc_num, node.id(), PositionType::Body)) {
1748 if self.config.debug {
1749 eprintln!("[STAM fromxml]{} annotation covers text {:?} (bytes {}-{})", self.debugindent, offset, beginbyte, endbyte);
1750 }
1751 } else if self.text.is_empty() {
1752 return Err(XmlConversionError::ConfigError("Can't extract annotations on text if no text was extracted!".into()));
1753 }
1754 }
1755 let begin = if let Some(offset) = offset {
1756 if let Cursor::BeginAligned(begin) = offset.begin {
1757 Some(begin)
1758 } else {
1759 None
1760 }
1761 } else {
1762 None
1763 };
1764 let end = if let Some(offset) = offset {
1765 if let Cursor::BeginAligned(end) = offset.end {
1766 Some(end)
1767 } else {
1768 None
1769 }
1770 } else {
1771 None
1772 };
1773
1774 let resource_id = if let Some(resource_handle) = self.resource_handle {
1775 store.resource(resource_handle).unwrap().id()
1776 } else {
1777 None
1778 };
1779
1780 let mut have_id = false;
1781 if let Some(template) = &element_config.id {
1782 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1783 let compiled_template = self.template_engine.template(template.as_str());
1784 let id = compiled_template.render(&context).to_string().map_err(|e|
1785 XmlConversionError::TemplateError(
1786 format!(
1787 "whilst rendering id template '{}' for node '{}'",
1788 template,
1789 node.tag_name().name(),
1790 ),
1791 Some(e),
1792 )
1793 )?;
1794 if !id.is_empty() {
1795 builder = builder.with_id(id);
1796 have_id = true;
1797 }
1798 }
1799
1800 if !have_id {
1801 if let Some(resource_id) = resource_id {
1803 builder = builder.with_id(stam::generate_id(&format!("{}-",resource_id), ""));
1804 } else {
1805 builder = builder.with_id(stam::generate_id("", ""));
1806 }
1807 }
1808
1809 builder = self.add_annotationdata_to_builder(element_config.annotationdata.iter(), builder, node.clone(), begin, end, resource_id, inputfile, doc_num)?;
1810
1811
1812 if self.config.provenance && inputfile.is_some() {
1813 let path_string = if let Some(id) = node.attribute((NS_XML,"id")) {
1814 format!("//{}[@xml:id=\"{}\"]", self.get_node_name_for_xpath(&node), id)
1816 } else {
1817 path.format_as_xpath(&self.prefixes)
1819 };
1820 let databuilder = AnnotationDataBuilder::new().with_dataset(CONTEXT_ANNO.into()).with_key("target".into()).with_value(
1821 BTreeMap::from([
1822 ("source".to_string(),inputfile.unwrap().into()),
1823 ("selector".to_string(),
1824 BTreeMap::from([
1825 ("type".to_string(),"XPathSelector".into()),
1826 ("value".to_string(),path_string.into())
1827 ]).into()
1828 )
1829 ]).into()
1830 );
1831 builder = builder.with_data_builder(databuilder);
1832 }
1833
1834
1835 match element_config.annotation {
1837 XmlAnnotationHandling::TextSelector => {
1838 if let Some(selector) = self.textselector(node, doc_num, PositionType::Body) {
1840 builder = builder.with_target(selector);
1841 if self.config.debug {
1842 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
1843 }
1844 store.annotate(builder)?;
1845 }
1846 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1847 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1848 }
1849 }
1850 XmlAnnotationHandling::ResourceSelector => {
1851 builder = builder.with_target(SelectorBuilder::ResourceSelector(
1853 self.resource_handle.into(),
1854 ));
1855 if self.config.debug {
1856 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
1857 }
1858 store.annotate(builder)?;
1859 }
1860 XmlAnnotationHandling::TextSelectorBetweenMarkers => {
1861 if let Some(selector) =
1863 self.textselector_for_markers(node, doc_num, store, element_config)
1864 {
1865 builder = builder.with_target(selector);
1866 if self.config.debug {
1867 eprintln!(
1868 "[STAM fromxml] builder TextSelectorBetweenMarkers: {:?}",
1869 builder
1870 );
1871 }
1872 store.annotate(builder)?;
1873 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1874 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1875 }
1876 }
1877 }
1878 _ => panic!(
1879 "Invalid annotationhandling: {:?}",
1880 element_config.annotation
1881 ),
1882 }
1883 }
1884
1885 if element_config.stop == Some(false) || element_config.stop.is_none() {
1887 for child in node.children() {
1888 if child.is_element() {
1889 self.debugindent.push_str(" ");
1890 let mut path = path.clone();
1891 let count = elder_siblings.count(&child);
1892 path.add(&child, Some(count));
1893 self.extract_element_annotation(child, &path, inputfile, doc_num, store)?;
1895 self.debugindent.pop();
1896 self.debugindent.pop();
1897 }
1898 }
1899 }
1900 } else {
1901 eprintln!(
1902 "[STAM fromxml]{} WARNING: no match, skipping annotation extraction for element {}",
1903 self.debugindent,
1904 path
1905 );
1906 }
1907 Ok(())
1908 }
1909
1910 fn add_annotationdata_to_builder<'input>(&self, iter: impl Iterator<Item = &'a XmlAnnotationDataConfig>,
1911 mut builder: AnnotationBuilder<'a>,
1912 node: Node<'a, 'input>,
1913 begin: Option<usize>,
1914 end: Option<usize>,
1915 resource_id: Option<&str>,
1916 inputfile: Option<&str>,
1917 doc_num: usize,
1918 ) -> Result<AnnotationBuilder<'a>, XmlConversionError> {
1919 for annotationdata in iter {
1920 let mut databuilder = AnnotationDataBuilder::new();
1921 if let Some(template) = &annotationdata.set {
1922 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1923 let compiled_template = self.template_engine.template(template.as_str());
1924 let dataset = compiled_template.render(&context).to_string().map_err(|e|
1925 XmlConversionError::TemplateError(
1926 format!(
1927 "whilst rendering annotationdata/dataset template '{}' for node '{}'",
1928 template,
1929 node.tag_name().name(),
1930 ),
1931 Some(e),
1932 )
1933 )?;
1934 if !dataset.is_empty() {
1935 databuilder = databuilder.with_dataset(dataset.into())
1936 }
1937 } else {
1938 databuilder =
1939 databuilder.with_dataset(self.config.default_set.as_str().into());
1940 }
1941 if let Some(template) = &annotationdata.key {
1942 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1943 let compiled_template = self.template_engine.template(template.as_str());
1944 match compiled_template.render(&context).to_string().map_err(|e|
1945 XmlConversionError::TemplateError(
1946 format!(
1947 "whilst rendering annotationdata/key template '{}' for node '{}'",
1948 template,
1949 node.tag_name().name(),
1950 ),
1951 Some(e),
1952 )
1953 ) {
1954 Ok(key) if !key.is_empty() =>
1955 databuilder = databuilder.with_key(key.into()) ,
1956 Ok(_) if !annotationdata.skip_if_missing => {
1957 return Err(XmlConversionError::TemplateError(
1958 format!(
1959 "whilst rendering annotationdata/key template '{}' for node '{}'",
1960 template,
1961 node.tag_name().name(),
1962 ),
1963 None
1964 ));
1965 },
1966 Err(e) if !annotationdata.skip_if_missing => {
1967 return Err(e)
1968 },
1969 _ => {
1970 continue
1972 }
1973 }
1974 }
1975 if let Some(value) = &annotationdata.value {
1976 match self.extract_value(value, node, annotationdata.allow_empty_value, annotationdata.skip_if_missing, annotationdata.valuetype.as_ref().map(|s| s.as_str()), begin, end, resource_id, inputfile, doc_num)? {
1977 Some(DataValue::List(values)) if annotationdata.multiple => {
1978 for value in values {
1979 let mut databuilder_multi = databuilder.clone();
1980 databuilder_multi = databuilder_multi.with_value(value);
1981 builder = builder.with_data_builder(databuilder_multi);
1982 }
1983 },
1984 Some(value) => {
1985 databuilder = databuilder.with_value(value);
1986 },
1987 None => {
1988 continue
1990 }
1991 }
1992 }
1993 if !annotationdata.multiple {
1994 builder = builder.with_data_builder(databuilder);
1995 }
1996 }
1997 Ok(builder)
1998 }
1999
2000 fn annotate_textaffixes<'b>(
2002 &mut self,
2003 node: Node<'a,'b>,
2004 element_config: &XmlElementConfig,
2005 inputfile: Option<&str>,
2006 doc_num: usize,
2007 store: &mut AnnotationStore,
2008 ) -> Result<(), XmlConversionError> {
2009
2010
2011 if !element_config.annotatetextprefix.is_empty() {
2012 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textprefix-", ""));
2013 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextPrefix)) {
2014 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2015 Some(begin)
2016 } else {
2017 None
2018 };
2019 let end = if let Cursor::BeginAligned(end) = offset.end {
2020 Some(end)
2021 } else {
2022 None
2023 };
2024 builder = self.add_annotationdata_to_builder(element_config.annotatetextprefix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextPrefix) {
2026 builder = builder.with_target(selector);
2027 if self.config.debug {
2028 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2029 }
2030 store.annotate(builder)?;
2031 } else {
2032 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2033 }
2034 }
2035 }
2036
2037 if !element_config.annotatetextsuffix.is_empty() {
2038 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textsuffix-", ""));
2039 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextSuffix)) {
2040 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2041 Some(begin)
2042 } else {
2043 None
2044 };
2045 let end = if let Cursor::BeginAligned(end) = offset.end {
2046 Some(end)
2047 } else {
2048 None
2049 };
2050 builder = self.add_annotationdata_to_builder(element_config.annotatetextsuffix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextSuffix) {
2052 builder = builder.with_target(selector);
2053 if self.config.debug {
2054 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2055 }
2056 store.annotate(builder)?;
2057 } else {
2058 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2059 }
2060 }
2061 }
2062 Ok(())
2063 }
2064
2065 fn extract_value<'b>(&self, value: &'a toml::Value, node: Node<'a,'b>, allow_empty_value: bool, skip_if_missing: bool, valuetype: Option<&str>, begin: Option<usize>, end: Option<usize>, resource_id: Option<&str>, inputfile: Option<&str>, doc_num: usize) -> Result<Option<DataValue>, XmlConversionError>{
2067 match value {
2068 toml::Value::String(template) => {
2069 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
2070 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2082 XmlConversionError::TemplateError(
2083 format!(
2084 "whilst rendering annotationdata/map template '{}' for node '{}'.{}",
2085 template,
2086 node.tag_name().name(),
2087 if self.config.debug() {
2088 format!("\nContext was {:?}.\nVariables are: {:?}", context, self.variables.get(template))
2089 } else {
2090 String::new()
2091 }
2092 ),
2093 Some(e),
2094 )
2095 ) {
2096 Ok(value) => {
2097 if !value.is_empty() || allow_empty_value {
2098 string_to_datavalue(value, valuetype).map(|v| Some(v))
2099 } else {
2100 Ok(None)
2102 }
2103 },
2104 Err(e) if !skip_if_missing => {
2105 Err(e)
2106 },
2107 Err(_) if allow_empty_value => {
2108 Ok(Some("".into()))
2109 },
2110 Err(_) => {
2111 Ok(None)
2113 }
2114 }
2115 },
2116 toml::Value::Table(map) => {
2117 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2118 for (key, value) in map.iter() {
2119 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2120 resultmap.insert(key.clone(), value);
2121 }
2122 }
2123 Ok(Some(resultmap.into()))
2124 },
2125 toml::Value::Array(list) => {
2126 let mut resultlist: Vec<DataValue> = Vec::new();
2127 for value in list.iter() {
2128 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2129 resultlist.push(value);
2130 }
2131 }
2132 Ok(Some(resultlist.into()))
2133 }
2134 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2135 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2136 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2137 toml::Value::Datetime(_v) => {
2138 todo!("fromxml: Datetime conversion not implemented yet");
2139 }
2140 }
2141 }
2142
2143 fn extract_value_metadata<'b>(&self, value: &'a toml::Value, context: &upon::Value, allow_empty_value: bool, skip_if_missing: bool, resource_id: Option<&str>) -> Result<Option<DataValue>, XmlConversionError>{
2145 match value {
2146 toml::Value::String(template) => {
2147 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2149 XmlConversionError::TemplateError(
2150 format!(
2151 "whilst rendering annotationdata/metadata template '{}' for metadata",
2152 template,
2153 ),
2154 Some(e),
2155 )
2156 ) {
2157 Ok(value) => {
2158 if !value.is_empty() || allow_empty_value {
2159 Ok(Some(value.into()))
2160 } else {
2161 Ok(None)
2163 }
2164 },
2165 Err(e) if !skip_if_missing => {
2166 Err(e)
2167 },
2168 Err(_) if allow_empty_value => {
2169 Ok(Some("".into()))
2170 },
2171 Err(_) => {
2172 Ok(None)
2174 }
2175 }
2176 },
2177 toml::Value::Table(map) => {
2178 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2179 for (key, value) in map.iter() {
2180 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2181 resultmap.insert(key.clone(), value);
2182 }
2183 }
2184 Ok(Some(resultmap.into()))
2185 },
2186 toml::Value::Array(list) => {
2187 let mut resultlist: Vec<DataValue> = Vec::new();
2188 for value in list.iter() {
2189 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2190 resultlist.push(value);
2191 }
2192 }
2193 Ok(Some(resultlist.into()))
2194 }
2195 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2196 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2197 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2198 toml::Value::Datetime(_v) => {
2199 todo!("fromxml: Datetime conversion not implemented yet");
2200 }
2201 }
2202 }
2203
2204 fn textselector<'s>(&'s self, node: Node, doc_num: usize, positiontype: PositionType) -> Option<SelectorBuilder<'s>> {
2206 let res_handle = self.resource_handle.expect("resource must be associated");
2207 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), positiontype)) {
2208 Some(SelectorBuilder::TextSelector(
2209 BuildItem::Handle(res_handle),
2210 offset.clone(),
2211 ))
2212 } else {
2213 None
2214 }
2215 }
2216
2217 fn textselector_for_markers<'b>(
2219 &self,
2220 node: Node,
2221 doc_num: usize,
2222 store: &AnnotationStore,
2223 element_config: &'b XmlElementConfig,
2224 ) -> Option<SelectorBuilder<'b>> {
2225 let resource = store
2226 .resource(
2227 self.resource_handle
2228 .expect("resource must have been created"),
2229 )
2230 .expect("resource must exist");
2231 let mut end: Option<usize> = None;
2232 if let Some(markers) = self.markers.get(&element_config.hash()) {
2233 let mut grab = false;
2234 for (d_num, n_id) in markers.iter() {
2235 if grab {
2236 end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2238 offset
2239 .begin
2240 .try_into()
2241 .expect("begin cursor must be beginaligned")
2242 });
2243 break;
2244 }
2245 if doc_num == *d_num && *n_id == node.id() {
2246 grab = true;
2248 }
2249 }
2250 };
2251 if end.is_none() {
2252 end = Some(resource.textlen());
2254 }
2255 if let (Some(offset), Some(end)) = (self.positionmap.get(&(doc_num, node.id(), PositionType::Body)), end) {
2256 Some(SelectorBuilder::TextSelector(
2257 BuildItem::Handle(self.resource_handle.unwrap()),
2258 Offset::simple(
2259 offset
2260 .begin
2261 .try_into()
2262 .expect("begin cursor must be beginaligned"),
2263 end,
2264 ),
2265 ))
2266 } else {
2267 None
2268 }
2269 }
2270
2271 fn set_global_context(&mut self) {
2272 self.global_context
2273 .insert("context".into(), upon::Value::Map(self.config.context.iter().map(|(k,v)| (k.clone(), map_value(v))).collect()));
2274 self.global_context
2275 .insert("namespaces".into(), self.config.namespaces.clone().into());
2276 self.global_context
2277 .insert("default_set".into(), self.config.default_set.clone().into());
2278 }
2279
2280 fn render_template<'input, 't>(
2281 &self,
2282 template: &'t str,
2283 node: &Node<'a, 'input>,
2284 begin: Option<usize>,
2285 end: Option<usize>,
2286 resource: Option<&str>,
2287 inputfile: Option<&str>,
2288 doc_num: usize,
2289 ) -> Result<Cow<'t, str>, XmlConversionError> {
2290 if template.chars().any(|c| c == '{') {
2291 let compiled_template = self.template_engine.template(template);
2293 let context = self.context_for_node(&node, begin, end, template, resource, inputfile, doc_num);
2294 let result = compiled_template.render(context).to_string()?;
2295 Ok(Cow::Owned(result))
2296 } else {
2297 Ok(Cow::Borrowed(template))
2299 }
2300 }
2301
2302 fn context_for_node<'input>(
2303 &self,
2304 node: &Node<'a, 'input>,
2305 begin: Option<usize>,
2306 end: Option<usize>,
2307 template: &str,
2308 resource: Option<&str>,
2309 inputfile: Option<&str>,
2310 doc_num: usize,
2311 ) -> upon::Value {
2312 let mut context = self.global_context.clone();
2313 let length = if let (Some(begin), Some(end)) = (begin, end) {
2314 Some(end - begin)
2315 } else {
2316 None
2317 };
2318 context.insert("localname".into(), node.tag_name().name().into());
2319 context.insert("name".into(), self.get_node_name_for_template(node).into());
2321 if let Some(namespace) = node.tag_name().namespace() {
2322 context.insert("namespace".into(), namespace.into());
2324 }
2325
2326 if let Some(begin) = begin {
2328 context.insert("begin".into(), upon::Value::Integer(begin as i64));
2329 }
2330 if let Some(end) = end {
2331 context.insert("end".into(), upon::Value::Integer(end as i64));
2332 }
2333 if let Some(length) = length {
2334 context.insert("length".into(), upon::Value::Integer(length as i64));
2335 }
2336 if let Some(resource) = resource {
2337 context.insert("resource".into(), resource.into());
2339 }
2340 if let Some(inputfile) = inputfile {
2341 context.insert("inputfile".into(), inputfile.into());
2343 }
2344 context.insert("doc_num".into(), upon::Value::Integer(doc_num as i64));
2346
2347 if let Some(vars) = self.variables.get(template) {
2348 for var in vars {
2349 let mut encodedvar = String::new();
2350 if let Some(value) = self.context_for_var(node, var, &mut encodedvar, false) {
2351 if self.config.debug() {
2352 eprintln!(
2353 "[STAM fromxml] Set context variable for template '{}' for node '{}': {}={:?} (encodedvar={})",
2354 template,
2355 node.tag_name().name(),
2356 var,
2357 value,
2358 encodedvar
2359 );
2360 }
2361 if value != upon::Value::None {
2362 context.insert(encodedvar, value);
2363 }
2364 } else if self.config.debug() {
2365 eprintln!(
2366 "[STAM fromxml] Missed context variable for template '{}' for node '{}': {}",
2367 template,
2368 node.tag_name().name(),
2369 var
2370 );
2371 }
2372 }
2373 }
2374 upon::Value::Map(context)
2375 }
2376
2377 fn context_for_var<'input>(
2381 &self,
2382 node: &Node<'a, 'input>,
2383 var: &str,
2384 path: &mut String,
2385 mut return_all_matches: bool,
2386 ) -> Option<upon::Value> {
2387
2388 let first = path.is_empty();
2390
2391 let var = if var.starts_with("?.$$") {
2392 if first {
2393 path.push_str("?.ELEMENTS_");
2394 return_all_matches = true;
2395 if self.config.debug {
2396 eprintln!("[STAM fromxml] will return all matches for {}", var);
2397 }
2398 };
2399 &var[4..]
2400 } else if var.starts_with("?.$") {
2401 if first {
2402 path.push_str("?.ELEMENT_");
2403 };
2404 &var[3..]
2405 } else if var.starts_with("$$") {
2406 if first {
2407 path.push_str("ELEMENTS_");
2408 return_all_matches = true;
2409 if self.config.debug {
2410 eprintln!("[STAM fromxml] will return all matches for {}", var);
2411 }
2412 };
2413 &var[2..]
2414 } else if var.starts_with("$") {
2415 if first {
2416 path.push_str("ELEMENT_");
2417 };
2418 &var[1..]
2419 } else if var.starts_with("?.@") {
2420 if first {
2421 path.push_str("?.");
2422 };
2423 &var[2..]
2424 } else {
2425 var
2426 };
2427
2428 if !first && !var.is_empty() && !path.ends_with("ELEMENT_") && !path.ends_with("ELEMENTS_"){
2429 path.push_str("_IN_");
2430 }
2431
2432 let (component, remainder) = var.split_once("/").unwrap_or((var,""));
2434 if component.is_empty() {
2436 if first && !remainder.is_empty() {
2437 let mut n = node.clone();
2439 while let Some(parentnode) = n.parent_element() {
2441 n = parentnode;
2442 }
2443 let (rootcomponent, remainder) = remainder.split_once("/").unwrap_or((remainder,""));
2445 let (prefix, localname) = if let Some(pos) = rootcomponent.find(":") {
2446 (Some(&rootcomponent[0..pos]), &rootcomponent[pos+1..])
2447 } else {
2448 (None, rootcomponent)
2449 };
2450 if localname != n.tag_name().name() && localname != "*" {
2452 None
2453 } else {
2454 if let Some(prefix) = prefix {
2455 path.push_str(prefix);
2456 path.push_str("__");
2457 }
2458 path.push_str(localname);
2459 self.context_for_var(&n, remainder, path, return_all_matches)
2460 }
2461 } else {
2462 Some(recursive_text(node).into())
2465 }
2466 } else if component.starts_with("@"){
2467 if let Some(pos) = component.find(":") {
2468 let prefix = &component[1..pos];
2469 if let Some(ns) = self.config.namespaces.get(prefix) {
2470 let var = &component[pos+1..];
2471 path.push_str("ATTRIB_");
2472 path.push_str(prefix);
2473 path.push_str("__");
2474 path.push_str(var);
2475 Some(
2476 node.attribute((ns.as_str(),var)).into()
2477 )
2478 } else {
2479 None
2480 }
2481 } else {
2482 let var = &component[1..];
2483 path.push_str("ATTRIB_");
2484 path.push_str(var);
2485 Some(
2486 node.attribute(var).into()
2487 )
2488 }
2489 } else if component == ".." {
2490 if let Some(parentnode) = node.parent_element().as_ref() {
2491 path.push_str("PARENT");
2493 self.context_for_var(parentnode, remainder, path, return_all_matches)
2494 } else {
2495 None
2496 }
2497 } else if component == "." {
2498 path.push_str("THIS");
2499 if !remainder.is_empty() {
2500 self.context_for_var(node, remainder, path, return_all_matches)
2502 } else {
2503 Some(recursive_text(node).into())
2504 }
2505 } else {
2506 let (prefix, localname) = if let Some(pos) = component.find(":") {
2507 (Some(&component[0..pos]), &component[pos+1..])
2508 } else {
2509 (None, component)
2510 };
2511 let localname_with_condition = localname;
2512 let (localname, condition_str, condition) = self.extract_condition(localname_with_condition); let mut multiple_value_buffer: Vec<upon::Value> = Vec::new(); let mut final_path: String = String::new(); for child in node.children() {
2517 if child.is_element() {
2518 let namedata = child.tag_name();
2519 let mut child_matches = if let Some(namespace) = namedata.namespace() {
2520 if let Some(foundprefix) = self.prefixes.get(namespace) {
2521 Some(foundprefix.as_str()) == prefix && localname == namedata.name()
2522 } else {
2523 false
2524 }
2525 } else {
2526 namedata.name() == localname
2527 };
2528 if child_matches {
2529 if let Some((attribname, negate, attribvalue)) = condition {
2531 if let Some(pos) = attribname.find(":") {
2533 let prefix = &attribname[0..pos];
2534 if let Some(ns) = self.config.namespaces.get(prefix) {
2535 let attribname = &attribname[pos+1..];
2536 if let Some(value) = child.attribute((ns.as_str(),attribname)) {
2537 if !negate && attribvalue != Some(value) {
2538 child_matches = false;
2539 } else if negate && attribvalue == Some(value) {
2540 child_matches = false;
2541 }
2542 } else {
2543 child_matches = false;
2544 }
2545 } else {
2546 child_matches = false;
2547 }
2548 } else {
2549 if let Some(value) = child.attribute(attribname) {
2550 if !negate && attribvalue != Some(value) {
2551 child_matches = false;
2552 } else if negate && attribvalue == Some(value) {
2553 child_matches = false;
2554 }
2555 } else {
2556 child_matches = false;
2557 }
2558 }
2559 }
2560 if !child_matches && self.config.debug {
2561 eprintln!("[STAM fromxml] candidate node does not meet condition: {}", localname_with_condition);
2562 }
2563 }
2565 if child_matches {
2566 let prevpathlen = path.len();
2567 if let Some(prefix) = prefix {
2569 path.push_str(prefix);
2570 path.push_str("__");
2571 }
2572 path.push_str(localname);
2573 if condition.is_some() {
2574 let mut hasher = DefaultHasher::new();
2576 condition_str.hash(&mut hasher);
2577 let h = hasher.finish();
2578 path.push_str(&format!("_COND{}_", h));
2579 }
2580 if let Some(value) = self.context_for_var(&child, remainder, path, return_all_matches) {
2581 if return_all_matches {
2583 if let upon::Value::List(v) = value {
2584 multiple_value_buffer.extend(v.into_iter());
2585 } else {
2586 multiple_value_buffer.push(value);
2587 }
2588 if final_path.is_empty() {
2589 final_path = path.clone();
2590 }
2591 } else {
2593 return Some(value);
2595 }
2596 }
2597 path.truncate(prevpathlen);
2599 }
2600 }
2601 }
2602 if !multiple_value_buffer.is_empty() {
2603 if self.config.debug {
2605 eprintln!("[STAM fromxml] returning multiple matches of {} as list", var);
2606 }
2607 *path = final_path;
2609 Some(multiple_value_buffer.into())
2610 } else {
2611 if self.config.debug {
2613 eprintln!("[STAM fromxml] returning with no match found for {} in {}", var, node.tag_name().name());
2614 }
2615 None
2616 }
2617 }
2618 }
2619
2620 fn extract_condition<'b>(&self, localname: &'b str) -> (&'b str, &'b str, Option<(&'b str, bool, Option<&'b str>)>) { if localname.ends_with("]") {
2623 if let Some(pos) = localname.find("[") {
2624 let condition = &localname[pos+1..localname.len()-1];
2625 let (mut attrib, negation, attribvalue) = if let Some(pos) = condition.find("=") {
2626 let attrib = condition[0..pos].trim();
2627 let value = condition[pos+1..].trim();
2628 let value = &value[1..value.len() - 1]; if attrib.ends_with('!') {
2630 (attrib[..attrib.len() - 1].trim(), true, Some(value))
2632 } else {
2633 (attrib.trim(), false, Some(value))
2634 }
2635 } else {
2636 (condition, false, None)
2637 };
2638 if attrib.starts_with('@') {
2639 attrib = &attrib[1..];
2641 }
2642 return (&localname[..pos], condition, Some((attrib, negation,attribvalue )) );
2643 }
2644 }
2645 (localname, "", None)
2646 }
2647
2648
2649 fn get_node_name_for_template<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2650 let extended_name = node.tag_name();
2651 match (extended_name.namespace(), extended_name.name()) {
2652 (Some(namespace), tagname) => {
2653 if let Some(prefix) = self.prefixes.get(namespace) {
2654 Cow::Owned(format!("{}__{}", prefix, tagname))
2655 } else {
2656 Cow::Borrowed(tagname)
2657 }
2658 }
2659 (None, tagname) => Cow::Borrowed(tagname),
2660 }
2661 }
2662
2663 fn get_node_name_for_xpath<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2664 let extended_name = node.tag_name();
2665 match (extended_name.namespace(), extended_name.name()) {
2666 (Some(namespace), tagname) => {
2667 if let Some(prefix) = self.prefixes.get(namespace) {
2668 Cow::Owned(format!("{}:{}", prefix, tagname))
2669 } else {
2670 Cow::Borrowed(tagname)
2671 }
2672 }
2673 (None, tagname) => Cow::Borrowed(tagname),
2674 }
2675 }
2676
2677
2678 fn precompile(&mut self, template: &'a str) -> Cow<'a,str> {
2679 let mut replacement = String::new();
2680 let mut variables: BTreeSet<&'a str> = BTreeSet::new();
2681 let mut begin = 0;
2682 let mut end = 0;
2683 for i in 0..template.len() {
2684 let slice = &template[i..];
2685 if slice.starts_with("{{") || slice.starts_with("{%") {
2686 begin = i;
2687 } else if slice.starts_with("}}") || slice.starts_with("%}") {
2688 if end < begin+2 {
2689 replacement.push_str(&template[end..begin+2]);
2690 }
2691 let inner = &template[begin+2..i]; replacement.push_str(&self.precompile_inblock(inner, &mut variables));
2693 end = i;
2694 }
2695 }
2696 if end > 0 {
2697 replacement.push_str(&template[end..]);
2698 }
2699 self.variables.insert(template.into(), variables);
2700 if !replacement.is_empty() {
2703 Cow::Owned(replacement)
2704 } else {
2705 Cow::Borrowed(template)
2706 }
2707 }
2708
2709 fn precompile_inblock<'s>(&self, s: &'s str, vars: &mut BTreeSet<&'s str>) -> Cow<'s,str> {
2710 let mut quoted = false;
2711 let mut var = false;
2712 let mut begin = 0;
2713 let mut end = 0;
2714 let mut replacement = String::new();
2715 let mut in_condition = false;
2716 for (i,c) in s.char_indices() {
2717 if in_condition && c != ']' {
2718 continue;
2719 }
2720 if c == '"' {
2721 quoted = !quoted;
2722 } else if !quoted {
2723 if !var && (c == '@' || c == '$') {
2724 var = true;
2726 begin = i;
2727 } else if var && c == '[' {
2728 in_condition = true;
2729 } else if var && in_condition && c == ']' {
2730 in_condition = false;
2732 } else if var && in_condition {
2733 continue;
2735 } else if var && (!c.is_alphanumeric() && c != '$' && c != '.' && c != '/' && c != '_' && c != ':' && c != '@') {
2736 if end < begin {
2738 replacement.push_str(&s[end..begin]);
2739 }
2740 let varname = &s[begin..i];
2741 vars.insert(varname);
2742 let replacement_var = self.precompile_name(varname);
2743 replacement += &replacement_var;
2744 end = i;
2745 var = false;
2746 }
2747 }
2748 }
2749 if end > 0 {
2750 replacement.push_str(&s[end..]);
2751 }
2752 if var {
2753 let varname = &s[begin..];
2755 vars.insert(varname);
2756 let replacement_var = self.precompile_name(varname);
2757 replacement += &replacement_var;
2758 }
2759 if !replacement.is_empty() {
2760 Cow::Owned(replacement)
2762 } else {
2763 Cow::Borrowed(s)
2764 }
2765 }
2766
2767 fn precompile_name(&self, s: &str) -> String {
2769 let mut replacement = String::new();
2770 let mut begincondition = None;
2771 let mut skip = 0;
2772 for (i,c) in s.char_indices() {
2773 if begincondition.is_some() && c != ']' {
2774 continue;
2775 } else if skip > 0 {
2776 skip -= 1;
2777 continue;
2778 }
2779 if c == '$' {
2780 let slice = &s[i..];
2781 if slice.starts_with("$$..") {
2782 replacement.push_str("ELEMENTS_PARENT");
2783 skip = 3;
2784 } else if slice.starts_with("$$.") {
2785 replacement.push_str("ELEMENTS_THIS");
2786 skip = 2;
2787 } else if slice.starts_with("$$/") {
2788 replacement.push_str("ELEMENTS_");
2789 skip = 2;
2790 } else if slice.starts_with("$$") {
2791 replacement.push_str("ELEMENTS_");
2792 skip = 1;
2793 } else if slice.starts_with("$..") {
2794 replacement.push_str("ELEMENT_PARENT");
2795 skip = 2;
2796 } else if slice.starts_with("$.") {
2797 replacement.push_str("ELEMENT_THIS");
2798 skip = 1;
2799 } else if slice.starts_with("$/") {
2800 replacement.push_str("ELEMENT_");
2801 skip = 1;
2802 } else {
2803 replacement.push_str("ELEMENT_");
2804 }
2805 } else if c == '@' {
2806 replacement.push_str("ATTRIB_");
2807 } else if c == '/' {
2808 replacement.push_str("_IN_");
2809 } else if c == ':' {
2810 replacement.push_str("__");
2811 } else if c == '[' {
2812 begincondition = Some(i+1);
2813 } else if c == ']' {
2814 if let Some(begin) = begincondition {
2816 let mut hasher = DefaultHasher::new();
2817 let _ = &s[begin..i].hash(&mut hasher);
2818 let h = hasher.finish();
2819 replacement.push_str(&format!("_COND{}_", h));
2820 }
2821 begincondition = None;
2822 } else {
2823 replacement.push(c);
2824 }
2825 }
2826 replacement
2828 }
2829
2830 fn add_metadata(&self, store: &mut AnnotationStore) -> Result<(), XmlConversionError> {
2831 for metadata in self.config.metadata.iter() {
2832 let mut builder = AnnotationBuilder::new();
2833
2834 let resource_id = if let Some(resource_handle) = self.resource_handle {
2835 store.resource(resource_handle).unwrap().id()
2836 } else {
2837 None
2838 };
2839
2840 let mut context = self.global_context.clone();
2841 if let Some(resource_id) = resource_id {
2842 context.insert("resource".into(), resource_id.into());
2843 }
2844
2845 if let Some(template) = &metadata.id {
2846 let compiled_template = self.template_engine.template(template.as_str());
2847 let id = compiled_template.render(&context).to_string().map_err(|e|
2848 XmlConversionError::TemplateError(
2849 format!(
2850 "whilst rendering metadata id template '{}'",
2851 template,
2852 ),
2853 Some(e),
2854 )
2855 )?;
2856 if !id.is_empty() {
2857 builder = builder.with_id(id);
2858 }
2859 }
2860
2861 for annotationdata in metadata.annotationdata.iter() {
2862 let mut databuilder = AnnotationDataBuilder::new();
2863 if let Some(template) = &annotationdata.set {
2864 let compiled_template = self.template_engine.template(template.as_str());
2865 let dataset = compiled_template.render(&context).to_string().map_err(|e|
2866 XmlConversionError::TemplateError(
2867 format!(
2868 "whilst rendering annotationdata/dataset template '{}' for metadata",
2869 template,
2870 ),
2871 Some(e),
2872 )
2873 )?;
2874 if !dataset.is_empty() {
2875 databuilder = databuilder.with_dataset(dataset.into())
2876 }
2877 } else {
2878 databuilder =
2879 databuilder.with_dataset(self.config.default_set.as_str().into());
2880 }
2881 if let Some(template) = &annotationdata.key {
2882 let compiled_template = self.template_engine.template(template.as_str());
2883 match compiled_template.render(&context).to_string().map_err(|e|
2884 XmlConversionError::TemplateError(
2885 format!(
2886 "whilst rendering annotationdata/key template '{}' for metadata",
2887 template,
2888 ),
2889 Some(e),
2890 )
2891 ) {
2892 Ok(key) if !key.is_empty() =>
2893 databuilder = databuilder.with_key(key.into()) ,
2894 Ok(_) if !annotationdata.skip_if_missing => {
2895 return Err(XmlConversionError::TemplateError(
2896 format!(
2897 "whilst rendering annotationdata/key template '{}' metadata",
2898 template,
2899 ),
2900 None
2901 ));
2902 },
2903 Err(e) if !annotationdata.skip_if_missing => {
2904 return Err(e)
2905 },
2906 _ => {
2907 continue
2909 }
2910 }
2911 }
2912 if let Some(value) = &annotationdata.value {
2913 match self.extract_value_metadata(value, &upon::Value::Map(context.clone()), annotationdata.allow_empty_value, annotationdata.skip_if_missing, resource_id.as_deref())? {
2914 Some(value) => {
2915 databuilder = databuilder.with_value(value);
2916 },
2917 None => {
2918 continue
2920 }
2921 }
2922 }
2923 builder = builder.with_data_builder(databuilder);
2924 }
2925
2926
2927
2928 match metadata.annotation {
2930 XmlAnnotationHandling::TextSelector => {
2931 builder = builder.with_target(SelectorBuilder::TextSelector(BuildItem::Handle(self.resource_handle.expect("resource must have handle")), Offset::whole()));
2933 if self.config.debug {
2934 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2935 }
2936 store.annotate(builder)?;
2937 }
2938 XmlAnnotationHandling::ResourceSelector | XmlAnnotationHandling::None | XmlAnnotationHandling::Unspecified => {
2939 builder = builder.with_target(SelectorBuilder::ResourceSelector(
2941 self.resource_handle.into(),
2942 ));
2943 if self.config.debug {
2944 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
2945 }
2946 store.annotate(builder)?;
2947 }
2948 _ => panic!(
2949 "Invalid annotationhandling for metadata: {:?}",
2950 metadata.annotation
2951 ),
2952 }
2953 }
2954 Ok(())
2955 }
2956}
2957
2958
2959
2960fn recursive_text(node: &Node) -> String {
2962 let mut s = String::new();
2963 for child in node.children() {
2964 if child.is_text() {
2965 s += child.text().expect("should have text");
2966 } else if child.is_element() {
2967 s += &recursive_text(&child);
2968 }
2969 }
2970 s
2971}
2972
2973fn filter_capitalize(s: &str) -> String {
2975 let mut out = String::with_capacity(s.len());
2976 for (i, c) in s.chars().enumerate() {
2977 if i == 0 {
2978 out.push_str(&c.to_uppercase().collect::<String>())
2979 } else {
2980 out.push(c);
2981 }
2982 }
2983 out
2984}
2985
2986fn filter_gt(a: &upon::Value, b: &upon::Value) -> bool {
2987 match (a, b) {
2988 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a > *b,
2989 (upon::Value::Float(a), upon::Value::Float(b)) => *a > *b,
2990 (upon::Value::String(a), upon::Value::String(b)) => *a > *b,
2991 _ => false,
2992 }
2993}
2994
2995fn filter_lt(a: &upon::Value, b: &upon::Value) -> bool {
2996 match (a, b) {
2997 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a < *b,
2998 (upon::Value::Float(a), upon::Value::Float(b)) => *a < *b,
2999 (upon::Value::String(a), upon::Value::String(b)) => *a < *b,
3000 _ => false,
3001 }
3002}
3003
3004fn filter_gte(a: &upon::Value, b: &upon::Value) -> bool {
3005 match (a, b) {
3006 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a >= *b,
3007 (upon::Value::Float(a), upon::Value::Float(b)) => *a >= *b,
3008 (upon::Value::String(a), upon::Value::String(b)) => *a >= *b,
3009 _ => false,
3010 }
3011}
3012
3013fn filter_lte(a: &upon::Value, b: &upon::Value) -> bool {
3014 match (a, b) {
3015 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a <= *b,
3016 (upon::Value::Float(a), upon::Value::Float(b)) => *a <= *b,
3017 (upon::Value::String(a), upon::Value::String(b)) => *a <= *b,
3018 _ => false,
3019 }
3020}
3021
3022fn filter_add(a: &upon::Value, b: &upon::Value) -> upon::Value {
3023 match (a, b) {
3024 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a + b),
3025 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a + b),
3026 (upon::Value::String(a), upon::Value::String(b)) => upon::Value::String(a.clone() + b),
3027 _ => upon::Value::None,
3028 }
3029}
3030
3031fn filter_sub(a: &upon::Value, b: &upon::Value) -> upon::Value {
3032 match (a, b) {
3033 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a - b),
3034 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a - b),
3035 _ => upon::Value::None,
3036 }
3037}
3038
3039fn filter_mul(a: &upon::Value, b: &upon::Value) -> upon::Value {
3040 match (a, b) {
3041 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a * b),
3042 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a * b),
3043 _ => upon::Value::None,
3044 }
3045}
3046
3047fn filter_div(a: &upon::Value, b: &upon::Value) -> upon::Value {
3048 match (a, b) {
3049 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a / b),
3050 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a / b),
3051 _ => upon::Value::None,
3052 }
3053}
3054
3055
3056fn map_value(value: &toml::Value) -> upon::Value {
3058 match value {
3059 toml::Value::String(s) => upon::Value::String(s.clone()),
3060 toml::Value::Integer(i) => upon::Value::Integer(*i),
3061 toml::Value::Float(i) => upon::Value::Float(*i),
3062 toml::Value::Boolean(v) => upon::Value::Bool(*v),
3063 toml::Value::Datetime(s) => upon::Value::String(s.to_string()),
3064 toml::Value::Array(v) => upon::Value::List(v.iter().map(|i| map_value(i)).collect()),
3065 toml::Value::Table(v) => upon::Value::Map(v.iter().map(|(k,i)| (k.clone(),map_value(i))).collect()),
3066 }
3067}
3068
3069#[inline]
3071fn string_to_datavalue(value: String, valuetype: Option<&str>) -> Result<DataValue,XmlConversionError> {
3072 match valuetype {
3073 Some("str") | Some("string") => Ok(DataValue::String(value)),
3074 Some("int") => {
3075 if let Ok(value) = value.parse::<isize>() {
3076 Ok(DataValue::Int(value))
3077 } else {
3078 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3079 }
3080 },
3081 Some("float") => {
3082 if let Ok(value) = value.parse::<f64>() {
3083 Ok(DataValue::Float(value))
3084 } else {
3085 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3086 }
3087 },
3088 Some("bool") => match value.as_str() {
3089 "yes" | "true" | "enabled" | "on" | "1" | "active" => Ok(DataValue::Bool(true)),
3090 _ => Ok(DataValue::Bool(false))
3091 },
3092 Some(x) => {
3093 Err(XmlConversionError::TemplateError(format!("Invalid valuetype: {}", x), None))
3094 }
3095 None => {
3096 if let Ok(value) = value.parse::<isize>() {
3098 Ok(DataValue::Int(value))
3099 } else if let Ok(value) = value.parse::<f64>() {
3100 Ok(DataValue::Float(value))
3101 } else if value.starts_with("(list) [ ") && value.ends_with(" ]") {
3102 if let Ok(serde_json::Value::Array(values)) = serde_json::from_str(&value[6..]) {
3104 Ok(DataValue::List(values.into_iter().map(|v| {
3105 match v {
3106 serde_json::Value::String(s) => DataValue::String(s),
3107 serde_json::Value::Number(n) => if let Some(n) = n.as_i64() {
3108 DataValue::Int(n as isize)
3109 } else if let Some(n) = n.as_f64() {
3110 DataValue::Float(n)
3111 } else {
3112 unreachable!("number should always be either int or float")
3113 },
3114 serde_json::Value::Bool(b) => DataValue::Bool(b),
3115 _ => DataValue::Null, }
3117 }).collect()))
3118 } else {
3119 Err(XmlConversionError::TemplateError(format!("Unable to deserialize list value: {}", value), None))
3120 }
3121 } else {
3122 Ok(value.into())
3123 }
3124 }
3125 }
3126}
3127
3128fn string_to_templatevalue(value: String) -> upon::Value {
3129 if let Ok(value) = value.parse::<i64>() {
3130 upon::Value::Integer(value)
3131 } else if let Ok(value) = value.parse::<f64>() {
3132 upon::Value::Float(value)
3133 } else {
3134 upon::Value::String(value)
3135 }
3136}
3137
3138fn value_formatter(f: &mut upon::fmt::Formatter<'_>, value: &upon::Value) -> upon::fmt::Result {
3141 match value {
3142 upon::Value::List(vs) => {
3143 f.write_str("(list) [ ")?;
3144 for (i, v) in vs.iter().enumerate() {
3145 if i > 0 {
3146 f.write_str(", ")?;
3147 }
3148 if let upon::Value::String(s) = v {
3149 write!(f, "\"{}\"", s.replace("\"","\\\"").replace("\n"," ").split_whitespace().collect::<Vec<_>>().join(" "))?;
3150 } else {
3151 upon::fmt::default(f, v)?;
3152 f.write_char('"')?;
3153 }
3154 }
3155 f.write_str(" ]")?;
3156 }
3157 v => upon::fmt::default(f, v)?, };
3159 Ok(())
3160}
3161
3162#[derive(Clone,Debug,Deserialize)]
3163struct ExternalFilter {
3164 name: String,
3166
3167 command: String,
3169
3170 args: Vec<String>
3172}
3173
3174impl ExternalFilter {
3175 fn run(&self, input_value: &upon::Value) -> upon::Value {
3177 let process = Command::new(self.command.as_str()).args(
3178 self.args.iter().map(|x| if x == "{{value}}" || x == "{{ value }}" || x == "$value" {
3180 match input_value {
3181 upon::Value::String(s) => s.clone(),
3182 upon::Value::Integer(d) => format!("{}",d),
3183 upon::Value::Float(d) => format!("{}",d),
3184 upon::Value::Bool(d) => format!("{}",d),
3185 upon::Value::None => String::new(),
3186 _ => panic!("Lists and maps are not supported to be passed as parameter to external filters yet!"),
3187 }
3188 } else {
3189 x.clone() })
3191 ).stdin(Stdio::piped()).stdout(Stdio::piped()).spawn();
3192
3193
3194 if let Ok(mut process) = process {
3195 {
3196 let mut outstdin = process.stdin.take().expect("unable to open stdin for external filter");
3197 let mut writer = BufWriter::new(&mut outstdin);
3198 match input_value {
3199 upon::Value::String(s) => writer.write(s.as_bytes()),
3200 upon::Value::Integer(d) => writer.write(format!("{}",d).as_bytes()),
3201 upon::Value::Float(d) => writer.write(format!("{}",d).as_bytes()),
3202 upon::Value::Bool(d) => writer.write(format!("{}",d).as_bytes()),
3203 upon::Value::None => writer.write(&[]),
3204 _ => panic!("Lists and maps are not supported to be passed as input to external filters yet!"),
3205 }.expect("Writing to stdin for external filter failed!");
3206 }
3208 let output = process.wait_with_output().expect("External filter wasn't running");
3209 if !output.status.success() {
3210 panic!("External filter {} failed ({:?})", self.name, output.status.code());
3211 }
3212 if let Ok(s) = String::from_utf8(output.stdout) {
3213 return string_to_templatevalue(s);
3214 } else {
3215 panic!("External filter {} produced invalid UTF-8!", self.name);
3216 }
3217 }
3218 panic!("External filter {} failed!", self.name);
3219 }
3220}
3221
3222#[cfg(test)]
3223mod tests {
3224 use super::*;
3225 const XMLSMALLEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3228<head><title>test</title></head><body><h1>TEST</h1><p xml:id="p1" n="001">This is a <em xml:id="emphasis" style="color:green">test</em>.</p></body></html>"#;
3229
3230 const XMLEXAMPLE: &'static str = r#"<!DOCTYPE entities[<!ENTITY nbsp " ">]>
3231<html xmlns="http://www.w3.org/1999/xhtml" xmlns:my="http://example.com">
3232<head>
3233 <title>Test</title>
3234 <meta name="author" content="proycon" />
3235</head>
3236<body>
3237 <h1>Header</h1>
3238
3239 <p xml:id="par1">
3240 <span xml:id="sen1">This is a sentence.</span>
3241 <span xml:id="sen2">This is the second sentence.</span>
3242 </p>
3243 <p xml:id="par2">
3244 <strong>This</strong> is the <em>second</em> paragraph.
3245 It has a <strong>bold</strong> word and one in <em>italics</em>.<br/>
3246 Let's highlight stress in the following word: <span my:stress="secondary">re</span>pu<span my:stress="primary">ta</span>tion.
3247 </p>
3248 <p xml:space="preserve"><![CDATA[This third
3249paragraph consists
3250of CDATA and is configured to preserve whitespace, and weird &entities; ]]></p>
3251
3252 <h2>Subsection</h2>
3253
3254 <p>
3255 Have some fruits:<br/>
3256 <ul xml:id="list1" class="fruits">
3257 <li xml:id="fruit1">apple</li>
3258 <li xml:id="fruit2">banana</li>
3259 <li xml:id="fruit3">melon</li>
3260 </ul>
3261 </p>
3262
3263 Some lingering text outside of any confines...
3264</body>
3265</html>"#;
3266
3267 const XMLEXAMPLE_TEXTOUTPUT: &'static str = "Header\n\nThis is a sentence. This is the second sentence.\n\nThis is the second paragraph. It has a bold word and one in italics.\nLet's highlight stress in the following word: reputation.\n\nThis third\nparagraph consists\nof CDATA and is configured to preserve whitespace, and weird &entities; \nSubsection\n\nHave some fruits:\n* apple\n* banana\n* melon\n\nSome lingering text outside of any confines...";
3268
3269 const XMLTEISPACE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3271<body><space dim="vertical" unit="lines" quantity="3" /></body></html>"#;
3272
3273 const CONF: &'static str = r#"#default whitespace handling (Collapse or Preserve)
3274whitespace = "Collapse"
3275default_set = "urn:stam-fromhtml"
3276
3277[namespaces]
3278#this defines the namespace prefixes you can use in this configuration
3279xml = "http://www.w3.org/XML/1998/namespace"
3280html = "http://www.w3.org/1999/xhtml"
3281xsd = "http://www.w3.org/2001/XMLSchema"
3282xlink = "http://www.w3.org/1999/xlink"
3283
3284# elements and attributes are matched in reverse-order, so put more generic statements before more specific ones
3285
3286#Define some base elements that we reuse later for actual elements (prevents unnecessary repetition)
3287[baseelements.common]
3288id = "{% if ?.@xml:id %}{{ @xml:id }}{% endif %}"
3289
3290 [[baseelements.common.annotationdata]]
3291 key = "type"
3292 value = "{{ localname }}"
3293
3294 [[baseelements.common.annotationdata]]
3295 key = "lang"
3296 value = "{{ @xml:lang }}"
3297 skip_if_missing = true
3298
3299 [[baseelements.common.annotationdata]]
3300 key = "n"
3301 value = "{{ @n }}"
3302 skip_if_missing = true
3303 valuetype = "int"
3304
3305 [[baseelements.common.annotationdata]]
3306 key = "nstring"
3307 value = "{{ @n }}"
3308 skip_if_missing = true
3309 valuetype = "string"
3310
3311 [[baseelements.common.annotationdata]]
3312 key = "style"
3313 value = "{{ @style }}"
3314 skip_if_missing = true
3315
3316 [[baseelements.common.annotationdata]]
3317 key = "class"
3318 value = "{{ @class }}"
3319 skip_if_missing = true
3320
3321 [[baseelements.common.annotationdata]]
3322 key = "src"
3323 value = "{{ @src }}"
3324 skip_if_missing = true
3325
3326[baseelements.text]
3327text = true
3328
3329
3330[[elements]]
3331base = [ "text", "common" ]
3332path = "*"
3333text = true
3334annotation = "TextSelector"
3335
3336# Pass through the following elements without mapping to text
3337[[elements]]
3338base = [ "common" ]
3339path = "//html:head"
3340
3341[[elements]]
3342base = [ "common" ]
3343path = "//html:head//*"
3344
3345# Map metadata like <meta name="key" content="value"> to annotations with key->value data selecting the resource (ResourceSelector)
3346[[elements]]
3347base = [ "common" ]
3348path = "//html:head//html:meta"
3349
3350[[elements.annotationdata]]
3351key = "{% if ?.@name %}{{ name }}{% endif %}"
3352value = "{% if ?.@content %}{{ @content }}{% endif %}"
3353skip_if_missing = true
3354
3355# By default, ignore any tags in the head (unless they're mentioned specifically later in the config)
3356[[elements]]
3357path = "//html:head/html:title"
3358annotation = "ResourceSelector"
3359
3360[[elements.annotationdata]]
3361key = "title"
3362value = "{{ $. | trim }}"
3363
3364
3365# Determine how various structural elements are converted to text
3366
3367[[elements]]
3368base = [ "common" ]
3369path = "//html:br"
3370textsuffix = "\n"
3371
3372[[elements]]
3373base = [ "common", "text" ]
3374path = "//html:p"
3375textprefix = "\n"
3376textsuffix = "\n"
3377annotation = "TextSelector"
3378
3379# Let's do headers and bulleted lists like markdown
3380[[elements]]
3381base = [ "common", "text" ]
3382path = "//html:h1"
3383textsuffix = "\n"
3384
3385[[elements]]
3386base = [ "common", "text" ]
3387path = "//html:h2"
3388textsuffix = "\n"
3389
3390#Generic, will be overriden by more specific one
3391[[elements]]
3392base = [ "common", "text" ]
3393path = "//html:li"
3394textprefix = "- "
3395textsuffix = "\n"
3396
3397[[elements]]
3398base = [ "common", "text" ]
3399path = """//html:body"""
3400annotation = "TextSelector"
3401id = "body"
3402
3403 [[elements.annotationdata]]
3404 key = "title_from_parent"
3405 value = "{{ $../html:head/html:title }}"
3406 skip_if_missing = true
3407
3408 [[elements.annotationdata]]
3409 key = "title_from_root"
3410 value = "{{ $/html:html/html:head/html:title }}"
3411 skip_if_missing = true
3412
3413 [[elements.annotationdata]]
3414 key = "firstfruit"
3415 value = """{{ $./html:p/html:ul/html:li }}"""
3416 skip_if_missing = true
3417
3418 [[elements.annotationdata]]
3419 key = "fruits"
3420 value = """{{ $$./html:p/html:ul/html:li }}"""
3421 skip_if_missing = true
3422
3423 [[elements.annotationdata]]
3424 key = "multifruits"
3425 value = """{{ $$./html:p/html:ul/html:li }}"""
3426 skip_if_missing = true
3427 multiple = true
3428
3429#More specific one takes precendence over the above generic one
3430[[elements]]
3431base = [ "common", "text" ]
3432path = """//html:ul[@class="fruits"]/html:li"""
3433textprefix = "* "
3434textsuffix = "\n"
3435
3436#Not real HTML, test-case modelled after TEI space
3437[[elements]]
3438base = [ "common" ]
3439path = """//html:space[@dim="vertical" and @unit="lines"]"""
3440text = true
3441textsuffix = """\n{% for x in @quantity | int | as_range %}\n{% endfor %}"""
3442
3443
3444[[elements]]
3445base = [ "common", "text" ]
3446path = "//html:example"
3447annotation = "TextSelector"
3448
3449[[elements.annotationdata]]
3450key = "requiredattrib"
3451value = "{{ @requiredattrib }}"
3452
3453[[elements.annotationdata]]
3454key = "optattrib"
3455value = "{{ ?.@optattrib }}"
3456
3457[[elements]]
3458base = [ "common","text" ]
3459path = "//html:marquee"
3460annotation = "TextSelector"
3461
3462#map value, some bogus data to test parsing
3463[[elements.annotationdata]]
3464key = "map"
3465
3466[elements.annotationdata.value]
3467text = "{{ $. }}"
3468number = 42
3469bogus = true
3470
3471[[metadata]]
3472id = "metadata"
3473
3474[[metadata.annotationdata]]
3475key = "author"
3476value = "proycon"
3477"#;
3478
3479 const XMLREQATTRIBEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3480<body><example xml:id="ann1" requiredattrib="blah">test</example></body></html>"#;
3481
3482 const XMLREQATTRIBEXAMPLE2: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3483<body><example xml:id="ann1">test</example></body></html>"#;
3484
3485 const XMLREQATTRIBEXAMPLE3: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3486<body><example xml:id="ann1" requiredattrib="blah" optattrib="blah">test</example></body></html>"#;
3487
3488 const XMLMAPEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3489<body><marquee xml:id="ann1">test</marquee></body></html>"#;
3490
3491 #[test]
3492 fn test_precompile_template_nochange() -> Result<(), String> {
3493 let config = XmlConversionConfig::new();
3494 let mut conv = XmlToStamConverter::new(&config);
3495 let template_in = "{{ foo }}";
3496 let template_out = conv.precompile(template_in);
3497 assert_eq!( template_out, template_in);
3498 assert!(!conv.variables.get(template_in).as_ref().unwrap().contains("foo"));
3500 Ok(())
3501 }
3502
3503 #[test]
3504 fn test_precompile_template_attrib() -> Result<(), String> {
3505 let config = XmlConversionConfig::new();
3506 let mut conv = XmlToStamConverter::new(&config);
3507 let template_in = "{{ @foo }}";
3508 let template_out = conv.precompile(template_in);
3509 assert_eq!(template_out, "{{ ATTRIB_foo }}");
3510 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3512 Ok(())
3513 }
3514
3515 #[test]
3516 fn test_precompile_template_attrib_ns() -> Result<(), String> {
3517 let config = XmlConversionConfig::new();
3518 let mut conv = XmlToStamConverter::new(&config);
3519 let template_in = "{{ @bar:foo }}";
3520 let template_out = conv.precompile(template_in);
3521 assert_eq!(template_out, "{{ ATTRIB_bar__foo }}");
3522 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@bar:foo"));
3524 Ok(())
3525 }
3526
3527 #[test]
3528 fn test_precompile_template_element() -> Result<(), String> {
3529 let config = XmlConversionConfig::new();
3530 let mut conv = XmlToStamConverter::new(&config);
3531 let template_in = "{{ $foo }}";
3532 let template_out = conv.precompile(template_in);
3533 assert_eq!(template_out, "{{ ELEMENT_foo }}");
3534 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$foo"));
3536 Ok(())
3537 }
3538
3539 #[test]
3540 fn test_precompile_template_element_ns() -> Result<(), String> {
3541 let config = XmlConversionConfig::new();
3542 let mut conv = XmlToStamConverter::new(&config);
3543 let template_in = "{{ $bar:foo }}";
3544 let template_out = conv.precompile(template_in);
3545 assert_eq!(template_out, "{{ ELEMENT_bar__foo }}");
3546 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$bar:foo"));
3548 Ok(())
3549 }
3550
3551 #[test]
3552 fn test_precompile_template_this_text() -> Result<(), String> {
3553 let config = XmlConversionConfig::new();
3554 let mut conv = XmlToStamConverter::new(&config);
3555 let template_in = "{{ $. }}";
3556 let template_out = conv.precompile(template_in);
3557 assert_eq!(template_out, "{{ ELEMENT_THIS }}");
3558 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$."));
3559 Ok(())
3560 }
3561
3562 #[test]
3563 fn test_precompile_template_parent_text() -> Result<(), String> {
3564 let config = XmlConversionConfig::new();
3565 let mut conv = XmlToStamConverter::new(&config);
3566 let template_in = "{{ $.. }}";
3567 let template_out = conv.precompile(template_in);
3568 assert_eq!(template_out, "{{ ELEMENT_PARENT }}");
3569 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$.."));
3570 Ok(())
3571 }
3572
3573 #[test]
3574 fn test_precompile_template_elements() -> Result<(), String> {
3575 let config = XmlConversionConfig::new();
3576 let mut conv = XmlToStamConverter::new(&config);
3577 let template_in = "{{ $$foo }}";
3578 let template_out = conv.precompile(template_in);
3579 assert_eq!(template_out, "{{ ELEMENTS_foo }}");
3580 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$foo"));
3581 Ok(())
3582 }
3583
3584 #[test]
3585 fn test_precompile_template_elements_ns() -> Result<(), String> {
3586 let config = XmlConversionConfig::new();
3587 let mut conv = XmlToStamConverter::new(&config);
3588 let template_in = "{{ $$bar:foo }}";
3589 let template_out = conv.precompile(template_in);
3590 assert_eq!(template_out, "{{ ELEMENTS_bar__foo }}");
3591 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$bar:foo"));
3592 Ok(())
3593 }
3594
3595
3596 #[test]
3597 fn test_precompile_template_attrib2() -> Result<(), String> {
3598 let config = XmlConversionConfig::new();
3599 let mut conv = XmlToStamConverter::new(&config);
3600 let template_in = "{% for x in @foo %}";
3601 let template_out = conv.precompile(template_in);
3602 assert_eq!(template_out, "{% for x in ATTRIB_foo %}");
3603 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3605 Ok(())
3606 }
3607
3608 #[test]
3609 fn test_precompile_template_attrib3() -> Result<(), String> {
3610 let config = XmlConversionConfig::new();
3611 let mut conv = XmlToStamConverter::new(&config);
3612 let template_in = "{{ ?.@foo }}";
3613 let template_out = conv.precompile(template_in);
3614 assert_eq!(template_out, "{{ ?.ATTRIB_foo }}");
3615 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3616 Ok(())
3617 }
3618
3619 #[test]
3620 fn test_precompile_template_path() -> Result<(), String> {
3621 let config = XmlConversionConfig::new();
3622 let mut conv = XmlToStamConverter::new(&config);
3623 let template_in = "{{ $x/y/z/@a }}";
3624 let template_out = conv.precompile(template_in);
3625 assert_eq!(template_out, "{{ ELEMENT_x_IN_y_IN_z_IN_ATTRIB_a }}");
3626 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$x/y/z/@a"));
3627 Ok(())
3628 }
3629
3630 #[test]
3631 fn test_loadconfig() -> Result<(), String> {
3632 let config = XmlConversionConfig::from_toml_str(CONF)?;
3633 let mut conv = XmlToStamConverter::new(&config);
3634 conv.compile().map_err(|e| format!("{}",e))?;
3635 assert_eq!(conv.config.namespaces.len(),4 , "number of namespaces");
3636 assert_eq!(conv.config.elements.len(), 15, "number of elements");
3637 assert_eq!(conv.config.baseelements.len(), 2, "number of baseelements");
3638 assert_eq!(conv.config.elements.get(0).unwrap().annotationdata.len(), 7,"number of annotationdata under first element");
3639 assert_eq!(conv.config.baseelements.get("common").unwrap().annotationdata.len(), 7,"number of annotationdata under baseelement common");
3640 Ok(())
3641 }
3642
3643 #[test]
3644 fn test_small() -> Result<(), String> {
3645 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3646 let mut store = stam::AnnotationStore::new(stam::Config::new());
3647 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3648 let res = store.resource("test").expect("resource must have been created at this point");
3649 assert_eq!(res.text(), "TEST\n\nThis is a test.\n", "resource text");
3650 assert_eq!(store.annotations_len(), 6, "number of annotations");
3651 let annotation = store.annotation("emphasis").expect("annotation must have been created at this point");
3652 assert_eq!(annotation.text_simple(), Some("test"));
3653 let key = store.key("urn:stam-fromhtml", "style").expect("key must exist");
3655 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("color:green"));
3656 let key = store.key("urn:stam-fromhtml", "title").expect("key must exist");
3657 let annotation = res.annotations_as_metadata().filter_key(&key).next().expect("annotation");
3658 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("test"));
3659 let bodyannotation = store.annotation("body").expect("body annotation not found");
3660 let title1 = store.key("urn:stam-fromhtml", "title_from_parent").expect("key must exist");
3661 let title2 = store.key("urn:stam-fromhtml", "title_from_root").expect("key must exist");
3662 assert_eq!(bodyannotation.data().filter_key(&title1).value_as_str(), Some("test"));
3663 assert_eq!(bodyannotation.data().filter_key(&title2).value_as_str(), Some("test"));
3664 Ok(())
3665 }
3666
3667 #[test]
3668 fn test_full() -> Result<(), String> {
3669 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3670 let mut store = stam::AnnotationStore::new(stam::Config::new());
3671 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3672 let res = store.resource("test").expect("resource must have been created at this point");
3673 assert_eq!(res.text(), XMLEXAMPLE_TEXTOUTPUT, "resource text");
3674 Ok(())
3675 }
3676
3677 #[test]
3678 fn test_firstfruit() -> Result<(), String> {
3679 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3680 let mut store = stam::AnnotationStore::new(stam::Config::new());
3681 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3682 let bodyannotation = store.annotation("body").expect("body annotation not found");
3683 let fruit = store.key("urn:stam-fromhtml", "firstfruit").expect("key must exist");
3684 assert_eq!(bodyannotation.data().filter_key(&fruit).value_as_str(), Some("apple") );
3685 Ok(())
3686 }
3687
3688 #[test]
3689 fn test_fruits() -> Result<(), String> {
3690 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3691 let mut store = stam::AnnotationStore::new(stam::Config::new());
3692 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3693 let bodyannotation = store.annotation("body").expect("body annotation not found");
3694 let fruits = store.key("urn:stam-fromhtml", "fruits").expect("key must exist");
3695 assert_eq!(bodyannotation.data().filter_key(&fruits).value(), Some(&DataValue::List(vec!("apple".into(),"banana".into(),"melon".into()) )));
3696 Ok(())
3697 }
3698
3699 #[test]
3700 fn test_multifruits() -> Result<(), String> {
3701 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3702 let mut store = stam::AnnotationStore::new(stam::Config::new());
3703 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3704 let bodyannotation = store.annotation("body").expect("body annotation not found");
3705 let fruits = store.key("urn:stam-fromhtml", "multifruits").expect("key must exist");
3706 let results: Vec<_> = bodyannotation.data().filter_key(&fruits).collect();
3707 assert_eq!(results.len(), 3);
3708 assert_eq!(results.get(0).unwrap().value(),&DataValue::String("apple".to_string()) );
3709 assert_eq!(results.get(1).unwrap().value(),&DataValue::String("banana".to_string()) );
3710 assert_eq!(results.get(2).unwrap().value(),&DataValue::String("melon".to_string()) );
3711 Ok(())
3712 }
3713
3714 #[test]
3715 fn test_teispace() -> Result<(), String> {
3716 let config = XmlConversionConfig::from_toml_str(CONF)?;
3717 let mut store = stam::AnnotationStore::new(stam::Config::new());
3718 from_xml_in_memory("test", XMLTEISPACE, &config, &mut store)?;
3719 let res = store.resource("test").expect("resource must have been created at this point");
3720 assert_eq!(res.text(), "\n\n\n\n", "resource text");
3721 Ok(())
3722 }
3723
3724
3725 #[test]
3726 fn test_reqattrib() -> Result<(), String> {
3727 let config = XmlConversionConfig::from_toml_str(CONF)?;
3728 let mut store = stam::AnnotationStore::new(stam::Config::new());
3729 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE, &config, &mut store)?;
3730 let res = store.resource("test").expect("resource must have been created at this point");
3731 assert_eq!(res.text(), "test", "resource text");
3732 let key = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3733 let annotation = store.annotation("ann1").expect("annotation");
3734 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("blah"));
3735 assert!(store.key("urn:stam-fromhtml", "optattrib").is_none(), "optional attrib is unused");
3736 Ok(())
3737 }
3738
3739 #[test]
3740 fn test_reqattrib2() -> Result<(), String> {
3741 let mut config = XmlConversionConfig::from_toml_str(CONF)?;
3742 config = config.with_debug(true);
3743 let mut store = stam::AnnotationStore::new(stam::Config::new());
3744 assert!(from_xml_in_memory("test", XMLREQATTRIBEXAMPLE2, &config, &mut store).is_err(), "checking if error is returned");
3745 Ok(())
3746 }
3747
3748 #[test]
3749 fn test_reqattrib3() -> Result<(), String> {
3750 let config = XmlConversionConfig::from_toml_str(CONF)?;
3751 let mut store = stam::AnnotationStore::new(stam::Config::new());
3752 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE3, &config, &mut store)?;
3753 let res = store.resource("test").expect("resource must have been created at this point");
3754 assert_eq!(res.text(), "test", "resource text");
3755 let reqkey = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3756 let optkey = store.key("urn:stam-fromhtml", "optattrib").expect("key optattrib must exist");
3757 let annotation = store.annotation("ann1").expect("annotation");
3758 assert_eq!(annotation.data().filter_key(&reqkey).value_as_str(), Some("blah"));
3759 assert_eq!(annotation.data().filter_key(&optkey).value_as_str(), Some("blah"));
3760 Ok(())
3761 }
3762
3763 #[test]
3764 fn test_map() -> Result<(), String> {
3765 let config = XmlConversionConfig::from_toml_str(CONF)?;
3766 let mut store = stam::AnnotationStore::new(stam::Config::new());
3767 from_xml_in_memory("test", XMLMAPEXAMPLE, &config, &mut store)?;
3768 let res = store.resource("test").expect("resource must have been created at this point");
3769 assert_eq!(res.text(), "test", "resource text");
3770 let key = store.key("urn:stam-fromhtml", "map").expect("key must exist");
3771 let annotation = store.annotation("ann1").expect("annotation");
3772 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3773 if let DataValue::Map(data) = data {
3774 assert_eq!(data.get("text"), Some(&DataValue::String("test".into())));
3775 assert_eq!(data.get("number"), Some(&DataValue::Int(42)));
3776 assert_eq!(data.get("bogus"), Some(&DataValue::Bool(true)));
3777 assert_eq!(data.len(), 3);
3778 } else {
3779 assert!(false, "Data is supposed to be a map");
3780 }
3781 Ok(())
3782 }
3783
3784 #[test]
3785 fn test_metadata() -> Result<(), String> {
3786 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3787 let mut store = stam::AnnotationStore::new(stam::Config::new());
3788 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3789 let annotation = store.annotation("metadata").expect("annotation");
3790 let key = store.key("urn:stam-fromhtml", "author").expect("key must exist");
3791 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3792 assert_eq!(data, &DataValue::String("proycon".into()));
3793 Ok(())
3794 }
3795
3796 #[test]
3797 fn test_datavalue_int() -> Result<(), String> {
3798 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3799 let mut store = stam::AnnotationStore::new(stam::Config::new());
3800 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3801 let annotation = store.annotation("p1").expect("annotation not found");
3802 let key = store.key("urn:stam-fromhtml", "n").expect("key must exist");
3803 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::Int(1)));
3804 Ok(())
3805 }
3806
3807 #[test]
3808 fn test_datavalue_string() -> Result<(), String> {
3809 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3810 let mut store = stam::AnnotationStore::new(stam::Config::new());
3811 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3812 let annotation = store.annotation("p1").expect("annotation not found");
3813 let key = store.key("urn:stam-fromhtml", "nstring").expect("key must exist");
3814 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::String("001".to_string())));
3815 Ok(())
3816 }
3817
3818}