1use std::borrow::Cow;
2use std::collections::{BTreeMap, HashMap, BTreeSet};
3use std::fmt::Display;
4use std::fs::read_to_string;
5use std::path::Path;
6use std::hash::{Hash,DefaultHasher,Hasher};
7use std::process::{Command, Stdio};
8use std::io::{ BufWriter, Write};
9
10use roxmltree::{Document, Node, NodeId, ParsingOptions};
11use serde::Deserialize;
12use stam::*;
13use toml;
14use upon::Engine;
15use std::fmt::Write as FmtWrite;
16use serde_json;
17
18const NS_XML: &str = "http://www.w3.org/XML/1998/namespace";
19const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
20
21
22fn default_set() -> String {
23 "urn:stam-fromxml".into()
24}
25
26#[derive(Deserialize)]
27pub struct XmlConversionConfig {
29 #[serde(default)]
30 elements: Vec<XmlElementConfig>,
32
33 #[serde(default)]
34 baseelements: HashMap<String, XmlElementConfig>,
36
37 #[serde(default)]
38 namespaces: HashMap<String, String>,
40
41 #[serde(default = "XmlWhitespaceHandling::collapse")]
42 whitespace: XmlWhitespaceHandling,
44
45 #[serde(default)]
46 context: HashMap<String, toml::Value>,
48
49 #[serde(default)]
50 metadata: Vec<MetadataConfig>,
52
53 #[serde(default)]
54 inject_dtd: Option<String>,
56
57 #[serde(default = "default_set")]
58 default_set: String,
59
60 #[serde(default)]
61 id_prefix: Option<String>,
63
64 #[serde(default)]
65 id_strip_suffix: Vec<String>,
67
68 #[serde(default)]
69 provenance: bool,
71
72 #[serde(default)]
73 external_filters: Vec<ExternalFilter>,
74
75 #[serde(skip_deserializing)]
76 debug: bool,
77
78}
79
80impl XmlConversionConfig {
81 pub fn new() -> Self {
82 Self {
83 elements: Vec::new(),
84 baseelements: HashMap::new(),
85 namespaces: HashMap::new(),
86 context: HashMap::new(),
87 metadata: Vec::new(),
88 whitespace: XmlWhitespaceHandling::Collapse,
89 default_set: default_set(),
90 inject_dtd: None,
91 id_prefix: None,
92 id_strip_suffix: Vec::new(),
93 provenance: false,
94 external_filters: Vec::new(),
95 debug: false,
96 }
97 }
98
99 pub fn resolve_baseelements(&mut self) -> Result<(), XmlConversionError> {
100 let mut replace: Vec<(usize, XmlElementConfig)> = Vec::new();
101 for (i, element) in self.elements.iter().enumerate() {
102 let mut newelement = None;
103 for basename in element.base.iter().rev() {
104 if let Some(baseelement) = self.baseelements.get(basename) {
105 if newelement.is_none() {
106 newelement = Some(element.clone());
107 }
108 newelement
109 .as_mut()
110 .map(|newelement| newelement.update(baseelement));
111 } else {
112 return Err(XmlConversionError::ConfigError(format!(
113 "No such base element: {}",
114 basename
115 )));
116 }
117 }
118 if let Some(newelement) = newelement {
119 replace.push((i, newelement));
120 }
121 }
122 for (i, element) in replace {
123 self.elements[i] = element;
124 }
125 Ok(())
126 }
127
128 pub fn from_toml_str(tomlstr: &str) -> Result<Self, String> {
130 let mut config: Self = toml::from_str(tomlstr).map_err(|e| format!("{}", e))?;
131 config.resolve_baseelements().map_err(|e| format!("{}", e))?;
132 Ok(config)
133 }
134
135 pub fn with_debug(mut self, value: bool) -> Self {
136 self.debug = value;
137 self
138 }
139
140 pub fn with_provenance(mut self, value: bool) -> Self {
142 self.provenance = value;
143 self
144 }
145
146 pub fn with_prefix(mut self, prefix: impl Into<String>, namespace: impl Into<String>) -> Self {
148 self.namespaces.insert(prefix.into(), namespace.into());
149 self
150 }
151
152 pub fn with_id_prefix(mut self, prefix: impl Into<String>) -> Self {
154 self.id_prefix = Some(prefix.into());
155 self
156 }
157
158 pub fn with_id_strip_suffix(mut self, suffix: impl Into<String>) -> Self {
160 self.id_strip_suffix.push(suffix.into());
161 self
162 }
163
164 pub fn with_inject_dtd(mut self, dtd: impl Into<String>) -> Self {
166 self.inject_dtd = Some(dtd.into());
167 self
168 }
169
170 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
172 self.whitespace = handling;
173 self
174 }
175
176 pub fn with_element<F>(mut self, expression: &str, setup: F) -> Self
178 where
179 F: Fn(XmlElementConfig) -> XmlElementConfig,
180 {
181 let expression = XPathExpression::new(expression);
182 let element = setup(XmlElementConfig::new(expression));
183 if self.debug {
184 eprintln!("[STAM fromxml] registered {:?}", element);
185 }
186 self.elements.push(element);
187 self
188 }
189
190 fn element_config(&self, node: Node, path: &NodePath) -> Option<&XmlElementConfig> {
192 for elementconfig in self.elements.iter().rev() {
193 if elementconfig.path.test(path, node, self) {
194 return Some(elementconfig);
195 }
196 }
197 None
198 }
199
200 pub fn add_context(&mut self, key: impl Into<String>, value: toml::Value) {
201 self.context.insert(key.into(), value);
202 }
203
204 pub fn debug(&self) -> bool {
205 self.debug
206 }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
210pub enum XmlWhitespaceHandling {
212 Unspecified,
214 Inherit,
216 Preserve,
218 Collapse,
220}
221
222impl Default for XmlWhitespaceHandling {
223 fn default() -> Self {
224 XmlWhitespaceHandling::Unspecified
225 }
226}
227
228impl XmlWhitespaceHandling {
229 fn collapse() -> Self {
230 XmlWhitespaceHandling::Collapse
231 }
232}
233
234#[derive(Debug, Clone, Deserialize, PartialEq, Copy, Default)]
235pub enum XmlAnnotationHandling {
236 #[default]
238 Unspecified,
239
240 None,
242
243 TextSelector,
245
246 ResourceSelector,
248
249 TextSelectorBetweenMarkers,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct XmlElementConfig {
256 #[serde(default)]
259 path: XPathExpression,
260
261 #[serde(default)]
262 annotation: XmlAnnotationHandling,
263
264 #[serde(default)]
265 annotationdata: Vec<XmlAnnotationDataConfig>,
266
267 #[serde(default)]
269 textprefix: Option<String>,
270
271 #[serde(default)]
273 text: Option<bool>,
274
275 #[serde(default)]
277 textsuffix: Option<String>,
278
279 #[serde(default)]
281 annotatetextprefix: Vec<XmlAnnotationDataConfig>,
282
283 #[serde(default)]
285 annotatetextsuffix: Vec<XmlAnnotationDataConfig>,
286
287 #[serde(default)]
289 include_textprefix: Option<bool>,
290
291 #[serde(default)]
293 include_textsuffix: Option<bool>,
294
295 #[serde(default)]
297 base: Vec<String>,
298
299 #[serde(default)]
301 id: Option<String>,
302
303 #[serde(default)]
304 stop: Option<bool>,
306
307 #[serde(default)]
308 whitespace: XmlWhitespaceHandling,
310
311 #[serde(default)]
312 scope_id: Option<String>,
314
315 #[serde(default)]
316 marker_scope: Option<String>,
319
320}
321
322impl XmlElementConfig {
323 fn new(expression: XPathExpression) -> Self {
324 Self {
325 path: expression,
326 stop: None,
327 whitespace: XmlWhitespaceHandling::Unspecified,
328 annotation: XmlAnnotationHandling::Unspecified,
329 annotationdata: Vec::new(),
330 base: Vec::new(),
331 id: None,
332 textprefix: None,
333 text: None,
334 textsuffix: None,
335 annotatetextprefix: Vec::new(),
336 annotatetextsuffix: Vec::new(),
337 include_textprefix: None,
338 include_textsuffix: None,
339 scope_id: None,
340 marker_scope: None,
341 }
342 }
343
344 pub fn update(&mut self, base: &XmlElementConfig) {
345 if self.whitespace == XmlWhitespaceHandling::Unspecified
346 && base.whitespace != XmlWhitespaceHandling::Unspecified
347 {
348 self.whitespace = base.whitespace;
349 }
350 if self.annotation == XmlAnnotationHandling::Unspecified
351 && base.annotation != XmlAnnotationHandling::Unspecified
352 {
353 self.annotation = base.annotation;
354 }
355 if self.textprefix.is_none() && base.textprefix.is_some() {
356 self.textprefix = base.textprefix.clone();
357 }
358 if self.text.is_none() && base.text.is_some() {
359 self.text = base.text;
360 }
361 if self.textsuffix.is_none() && base.textsuffix.is_some() {
362 self.textsuffix = base.textsuffix.clone();
363 }
364 if self.id.is_none() && base.id.is_some() {
365 self.id = base.id.clone();
366 }
367 if self.stop.is_none() && base.stop.is_some() {
368 self.stop = base.stop;
369 }
370 for annotationdata in base.annotationdata.iter() {
371 if !self.annotationdata.contains(annotationdata) {
372 self.annotationdata.push(annotationdata.clone());
373 }
374 }
375 if self.annotatetextsuffix.is_empty() && !base.annotatetextsuffix.is_empty() {
376 self.annotatetextsuffix = base.annotatetextsuffix.clone();
377 }
378 if self.annotatetextprefix.is_empty() && !base.annotatetextprefix.is_empty() {
379 self.annotatetextprefix = base.annotatetextprefix.clone();
380 }
381 if self.include_textsuffix.is_none() {
382 self.include_textsuffix = base.include_textsuffix;
383 }
384 if self.include_textprefix.is_none() {
385 self.include_textprefix = base.include_textprefix;
386 }
387 }
388
389
390 pub fn with_stop(mut self, stop: bool) -> Self {
392 self.stop = Some(stop);
393 self
394 }
395
396 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
398 self.whitespace = handling;
399 self
400 }
401
402 pub fn with_text(mut self, text: bool) -> Self {
403 self.text = Some(text);
404 self
405 }
406
407 pub fn with_base(mut self, iter: impl Iterator<Item = impl Into<String>>) -> Self {
408 self.base = iter.into_iter().map(|s| s.into()).collect();
409 self
410 }
411
412 pub fn without_text(mut self) -> Self {
413 self.text = None;
414 self
415 }
416
417 pub fn with_annotation(mut self, annotation: XmlAnnotationHandling) -> Self {
418 self.annotation = annotation;
419 self
420 }
421
422 fn hash(&self) -> usize {
424 self.path.0.as_ptr() as usize
425 }
426}
427
428impl PartialEq for XmlElementConfig {
429 fn eq(&self, other: &Self) -> bool {
430 self.hash() == other.hash()
431 }
432}
433
434#[derive(Debug, Clone, Deserialize, PartialEq)]
435pub struct XmlAnnotationDataConfig {
436 id: Option<String>,
438 set: Option<String>,
440 key: Option<String>,
442 value: Option<toml::Value>,
444
445 #[serde(default)]
447 valuetype: Option<String>,
448
449 #[serde(default)]
451 allow_empty_value: bool,
452
453 #[serde(default)]
455 skip_if_missing: bool,
456
457
458 #[serde(default)]
460 multiple: bool,
461}
462
463impl XmlAnnotationDataConfig {
464 pub fn with_id(mut self, id: impl Into<String>) -> Self {
465 self.id = Some(id.into());
466 self
467 }
468
469 pub fn with_set(mut self, set: impl Into<String>) -> Self {
470 self.set = Some(set.into());
471 self
472 }
473
474 pub fn with_key(mut self, key: impl Into<String>) -> Self {
475 self.key = Some(key.into());
476 self
477 }
478
479 pub fn with_value(mut self, value: impl Into<toml::Value>) -> Self {
480 self.value = Some(value.into());
481 self
482 }
483}
484
485#[derive(Debug, Clone, PartialEq, Deserialize)]
487struct XPathExpression(String);
488
489impl XPathExpression {
490 pub fn new(expression: impl Into<String>) -> Self {
491 Self(expression.into())
492 }
493
494 pub fn any() -> Self {
495 Self("*".into())
496 }
497
498 pub fn iter<'a>(
499 &'a self,
500 config: &'a XmlConversionConfig,
501 ) -> impl Iterator<Item = (Option<&'a str>, &'a str, Option<&'a str>)> {
502 self.0.trim_start_matches('/').split("/").map(|segment| {
503 let (prefix, name, condition) = Self::parse_segment(segment);
505 let namespace = if let Some(prefix) = prefix {
506 if let Some(namespace) = config.namespaces.get(prefix).map(|x| x.as_str()) {
507 Some(namespace)
508 } else {
509 panic!(
510 "XML namespace prefix not known in configuration: {}",
511 prefix
512 );
513 }
514 } else {
515 None
516 };
517 (namespace, name, condition)
518 })
519 }
520
521 fn test<'a, 'b>(&self, path: &NodePath<'a, 'b>, mut node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
523 let mut pathiter = path.components.iter().rev();
524 for (refns, refname, condition) in self.iter(config).collect::<Vec<_>>().into_iter().rev() {
525 if let Some(component) = pathiter.next() {
526 if refname != "*" && refname != "" {
530 if refns.is_none() != component.namespace.is_none() || component.namespace != refns || refname != component.tagname {
531 return false;
532 }
533 }
534 if let Some(condition) = condition {
535 if !self.test_condition(condition, node, config) {
536 return false;
537 }
538 }
539 if let Some(parent) = node.parent() {
540 node = parent;
541 }
542 } else {
543 if refname != "" {
544 return false;
545 }
546 }
547 }
548 true
552 }
553
554 fn test_condition<'a,'b>(&self, condition: &'a str, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
555 for condition in condition.split(" and ") { if let Some(pos) = condition.find("!=") {
557 let var = &condition[..pos];
558 let right = condition[pos+2..].trim_matches('"');
559 if self.get_var(var, &node, config) == Some(right) {
560 return false;
561 }
562 } else if let Some(pos) = condition.find("=") {
563 let var = &condition[..pos];
564 let right = condition[pos+1..].trim_matches('"');
565 let value = self.get_var(var, &node, config);
566 if value != Some(right) {
567 return false;
568 }
569 } else {
570 let v = self.get_var(condition, &node, config);
572 if v.is_none() || v == Some("") {
573 return false;
574 }
575 }
576 }
577 true
581 }
582
583 fn get_var<'a,'b>(&self, var: &str, node: &Node<'a,'b>, config: &XmlConversionConfig) -> Option<&'a str> {
585 if var.starts_with("@") {
586 if let Some(pos) = var.find(":") {
587 let prefix = &var[1..pos];
588 if let Some(ns) = config.namespaces.get(prefix) {
589 let var = &var[pos+1..];
590 node.attribute((ns.as_str(),var))
591 } else {
592 None
593 }
594 } else {
595 node.attribute(&var[1..])
596 }
597 } else if var == "text()" {
598 node.text().map(|s|s.trim())
599 } else {
600 None
601 }
602 }
603
604 fn parse_segment<'a>(s: &'a str) -> (Option<&'a str>, &'a str, Option<&'a str>) {
606 let (name, condition) = if let (Some(begin), Some(end)) = (s.find("["), s.rfind("]")) {
607 (&s[..begin], Some(&s[begin + 1..end]))
608 } else {
609 (s, None)
610 };
611 if let Some((prefix, name)) = name.split_once(":") {
612 (Some(prefix), name, condition)
613 } else {
614 (None, name, condition)
615 }
616 }
617}
618
619
620
621impl Default for XPathExpression {
622 fn default() -> Self {
623 Self::any()
624 }
625}
626
627#[derive(Clone, Debug, PartialEq)]
628struct NodePathComponent<'a,'b> {
629 namespace: Option<&'a str>,
630 tagname: &'b str,
631 index: Option<usize>,
633}
634
635#[derive(Clone, Debug, PartialEq, Default)]
636struct NodePath<'a, 'b> {
637 components: Vec<NodePathComponent<'a,'b>>,
638}
639
640impl<'a, 'b> Display for NodePath<'a, 'b> {
641 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
642 for component in self.components.iter() {
643 write!(f, "/")?;
644 if let Some(ns) = component.namespace {
645 if let Some(index) = component.index {
646 write!(f, "{{{}}}{}[{}]", ns, component.tagname, index)?;
647 } else {
648 write!(f, "{{{}}}{}", ns, component.tagname)?;
649 }
650 } else {
651 if let Some(index) = component.index {
652 write!(f, "{}[{}]", component.tagname, index)?;
653 } else {
654 write!(f, "{}", component.tagname)?;
655 }
656 }
657 }
658 Ok(())
659 }
660}
661
662impl<'a,'b> NodePath<'a,'b> {
663 fn add(&mut self, node: &Node<'a,'b>, index: Option<usize>) {
664 if node.tag_name().name() != "" {
665 self.components.push(
666 NodePathComponent {
667 namespace: node.tag_name().namespace(),
668 tagname: node.tag_name().name(),
669 index,
670 }
671 )
672 }
673 }
674
675 fn format_as_xpath(&self, prefixes: &HashMap<String, String>) -> String {
676 let mut out = String::new();
677 for component in self.components.iter() {
678 out.push('/');
679 if let Some(ns) = component.namespace {
680 if let Some(prefix) = prefixes.get(ns) {
681 if let Some(index) = component.index {
682 out += &format!("{}:{}[{}]", prefix, component.tagname, index);
683 } else {
684 out += &format!("{}:{}", prefix, component.tagname);
685 }
686 } else {
687 eprintln!("STAM fromxml WARNING: format_as_xpath: namespace {} not defined, no prefix found!", ns);
688 if let Some(index) = component.index {
689 out += &format!("{}[{}]", component.tagname, index);
690 } else {
691 out += &format!("{}", component.tagname);
692 }
693 }
694 } else {
695 if let Some(index) = component.index {
696 out += &format!("{}[{}]", component.tagname, index);
697 } else {
698 out += &format!("{}", component.tagname);
699 }
700 }
701 }
702 out
703 }
704}
705
706
707#[derive(Default,Debug)]
709struct SiblingCounter {
710 map: HashMap<String,usize>,
711}
712
713impl SiblingCounter {
714 fn count<'a,'b>(&mut self, node: &Node<'a,'b>) -> usize {
715 let s = format!("{:?}", node.tag_name());
716 *self.map.entry(s).and_modify(|c| {*c += 1;}).or_insert(1)
717 }
718}
719
720
721#[derive(Debug, Clone, Deserialize)]
722pub struct MetadataConfig {
724 #[serde(default)]
726 annotation: XmlAnnotationHandling,
727
728 #[serde(default)]
729 annotationdata: Vec<XmlAnnotationDataConfig>,
730
731 #[serde(default)]
733 id: Option<String>,
734}
735
736pub fn from_xml<'a>(
738 filename: &Path,
739 config: &XmlConversionConfig,
740 store: &'a mut AnnotationStore,
741) -> Result<(), String> {
742 if config.debug {
743 eprintln!("[STAM fromxml] parsing {}", filename.display());
744 }
745
746 let mut xmlstring = read_to_string(filename)
748 .map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
749
750 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
752 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
753 }
754
755 if xmlstring[..100].find("<!DOCTYPE").is_none() {
757 if let Some(dtd) = config.inject_dtd.as_ref() {
758 xmlstring = dtd.to_string() + &xmlstring
759 };
760 } else if config.inject_dtd.is_some() {
761 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
762 }
763
764 let doc = Document::parse_with_options(
766 &xmlstring,
767 ParsingOptions {
768 allow_dtd: true,
769 ..ParsingOptions::default()
770 },
771 )
772 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
773
774 let mut converter = XmlToStamConverter::new(config);
775 converter
776 .compile()
777 .map_err(|e| format!("Error compiling templates: {}", e))?;
778
779 let textoutfilename = format!(
780 "{}.txt",
781 filename
782 .file_stem()
783 .expect("invalid filename")
784 .to_str()
785 .expect("invalid utf-8 in filename")
786 );
787
788 let mut path = NodePath::default();
790 path.add(&doc.root_element(), None);
791 converter
792 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), 0)
793 .map_err(|e| {
794 format!(
795 "Error extracting element text from {}: {}",
796 filename.display(),
797 e
798 )
799 })?;
800 if config.debug {
801 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
802 }
803 let resource = TextResourceBuilder::new()
804 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
805 .with_text(converter.text.clone())
806 .with_filename(&textoutfilename);
807
808 converter.resource_handle = Some(
809 store
810 .add_resource(resource)
811 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
812 );
813
814 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
815
816 converter
818 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),0, store)
819 .map_err(|e| {
820 format!(
821 "Error extracting element annotation from {}: {}",
822 filename.display(),
823 e
824 )
825 })?;
826
827 Ok(())
828}
829
830pub fn from_multi_xml<'a>(
832 filenames: &Vec<&Path>,
833 outputfile: Option<&Path>,
834 config: &XmlConversionConfig,
835 store: &'a mut AnnotationStore,
836) -> Result<(), String> {
837
838 let textoutfilename = if let Some(outputfile) = outputfile {
839 format!("{}",outputfile.to_str().expect("invalid utf-8 in filename"))
840 } else {
841 format!(
842 "{}.txt",
843 filenames.iter().next().expect("1 or more filename need to be provided")
844 .file_stem()
845 .expect("invalid filename")
846 .to_str()
847 .expect("invalid utf-8 in filename")
848 )
849 };
850
851 let mut xmlstrings: Vec<String> = Vec::new();
853 let mut docs: Vec<Document> = Vec::new();
854 for filename in filenames.iter() {
855 if config.debug {
856 eprintln!("[STAM fromxml] parsing {} (one of multiple)", filename.display());
857 }
858 let mut xmlstring = read_to_string(filename).map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
860 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
861 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
862 }
863 if xmlstring[..100].find("<!DOCTYPE").is_none() {
865 if let Some(dtd) = config.inject_dtd.as_ref() {
866 xmlstring = dtd.to_string() + &xmlstring
867 };
868 } else if config.inject_dtd.is_some() {
869 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
870 }
871 xmlstrings.push(xmlstring);
872 }
873
874 for (filename, xmlstring) in filenames.iter().zip(xmlstrings.iter()) {
875 let doc = Document::parse_with_options(
877 xmlstring,
878 ParsingOptions {
879 allow_dtd: true,
880 ..ParsingOptions::default()
881 },
882 )
883 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
884 docs.push(doc);
885 }
886
887 let mut converter = XmlToStamConverter::new(config);
888 converter
889 .compile()
890 .map_err(|e| format!("Error compiling templates: {}", e))?;
891
892 for (i, (doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
893 let mut path = NodePath::default();
894 path.add(&doc.root_element(), None);
895 converter
897 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), i)
898 .map_err(|e| {
899 format!(
900 "Error extracting element text from {}: {}",
901 filename.display(),
902 e
903 )
904 })?;
905 if config.debug {
906 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
907 }
908 }
909
910 let resource = TextResourceBuilder::new()
911 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
912 .with_text(converter.text.clone())
913 .with_filename(&textoutfilename);
914
915 converter.resource_handle = Some(
916 store
917 .add_resource(resource)
918 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
919 );
920
921 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
922
923 for (i,(doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
925 let mut path = NodePath::default();
926 path.add(&doc.root_element(), None);
927 converter
928 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),i, store)
929 .map_err(|e| {
930 format!(
931 "Error extracting element annotation from {}: {}",
932 filename.display(),
933 e
934 )
935 })?;
936 }
937
938 Ok(())
939}
940
941pub fn from_xml_in_memory<'a>(
943 resource_id: &str,
944 xmlstring: &str,
945 config: &XmlConversionConfig,
946 store: &'a mut AnnotationStore,
947) -> Result<(), String> {
948 if config.debug {
949 eprintln!("[STAM fromxml] parsing XML string");
950 }
951
952 let doc = Document::parse_with_options(
954 &xmlstring,
955 ParsingOptions {
956 allow_dtd: true,
957 ..ParsingOptions::default()
958 },
959 )
960 .map_err(|e| format!("Error parsing XML string: {}", e))?;
961
962 let mut converter = XmlToStamConverter::new(config);
963 converter
964 .compile()
965 .map_err(|e| format!("Error compiling templates: {}", e))?;
966
967 let mut path = NodePath::default();
968 path.add(&doc.root_element(), None);
969 converter
971 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(resource_id), Some(resource_id), 0)
972 .map_err(|e| {
973 format!(
974 "Error extracting element text from {}: {}",
975 resource_id,
976 e
977 )
978 })?;
979 if config.debug {
980 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
981 }
982 let resource = TextResourceBuilder::new()
983 .with_id(resource_id)
984 .with_text(converter.text.clone());
985
986 converter.resource_handle = Some(
987 store
988 .add_resource(resource)
989 .map_err(|e| format!("Failed to add resource {}: {}", &resource_id, e))?,
990 );
991
992 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata for {}: {}", &resource_id, e))?;
993
994 converter
996 .extract_element_annotation(doc.root_element(), &path, Some(resource_id), 0, store)
997 .map_err(|e| {
998 format!(
999 "Error extracting element annotation from {}: {}",
1000 resource_id,
1001 e
1002 )
1003 })?;
1004
1005 Ok(())
1006}
1007
1008pub fn filename_to_id<'a>(filename: &'a str, config: &XmlConversionConfig) -> &'a str {
1009 for suffix in config.id_strip_suffix.iter() {
1010 if filename.ends_with(suffix) {
1011 return &filename[..filename.len() - suffix.len()];
1012 }
1013 }
1014 return filename;
1015}
1016
1017#[derive(Clone,Copy,PartialEq, Hash, Eq)]
1018enum PositionType {
1019 Body,
1020 TextPrefix,
1021 TextSuffix,
1022}
1023
1024struct XmlToStamConverter<'a> {
1025 cursor: usize,
1027
1028 text: String,
1030
1031 template_engine: Engine<'a>,
1033
1034 positionmap: HashMap<(usize,NodeId,PositionType), Offset>,
1036
1037 bytepositionmap: HashMap<(usize,NodeId,PositionType), (usize, usize)>,
1039
1040 markers: HashMap<usize, Vec<(usize,NodeId)>>,
1042
1043 scopes: HashMap<String, (usize,NodeId)>,
1045
1046 resource_handle: Option<TextResourceHandle>,
1048
1049 pending_whitespace: bool,
1051
1052 config: &'a XmlConversionConfig,
1054
1055 prefixes: HashMap<String, String>,
1057
1058 global_context: BTreeMap<String, upon::Value>,
1060
1061 variables: BTreeMap<String, BTreeSet<&'a str>>,
1063
1064 debugindent: String,
1065}
1066
1067pub enum XmlConversionError {
1068 StamError(StamError),
1069 TemplateError(String, Option<upon::Error>),
1070 ConfigError(String),
1071}
1072
1073impl From<StamError> for XmlConversionError {
1074 fn from(error: StamError) -> Self {
1075 Self::StamError(error)
1076 }
1077}
1078
1079impl From<upon::Error> for XmlConversionError {
1080 fn from(error: upon::Error) -> Self {
1081 Self::TemplateError("".into(), Some(error))
1082 }
1083}
1084
1085impl Display for XmlConversionError {
1086 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1087 match self {
1088 Self::StamError(e) => e.fmt(f),
1089 Self::TemplateError(s, e) => {
1090 f.write_str(s.as_str())?;
1091 f.write_str(": ")?;
1092 if let Some(e) = e {
1093 e.fmt(f)?;
1094 }
1095 f.write_str("")
1096 }
1097 Self::ConfigError(e) => e.fmt(f),
1098 }
1099 }
1100}
1101
1102impl<'a> XmlToStamConverter<'a> {
1103 fn new(config: &'a XmlConversionConfig) -> Self {
1104 let mut prefixes: HashMap<String, String> = HashMap::new();
1105 for (prefix, namespace) in config.namespaces.iter() {
1106 prefixes.insert(namespace.to_string(), prefix.to_string());
1107 }
1108 let mut template_engine = Engine::new();
1109 template_engine.set_default_formatter(&value_formatter); template_engine.add_function("capitalize", filter_capitalize);
1111 template_engine.add_function("lower", str::to_lowercase);
1112 template_engine.add_function("upper", str::to_uppercase);
1113 template_engine.add_function("trim", |s: &str| s.trim().to_string() );
1114 template_engine.add_function("add", filter_add);
1115 template_engine.add_function("sub", filter_sub);
1116 template_engine.add_function("mul", filter_mul);
1117 template_engine.add_function("div", filter_div);
1118 template_engine.add_function("eq", |a: &upon::Value, b: &upon::Value| a == b);
1119 template_engine.add_function("ne", |a: &upon::Value, b: &upon::Value| a != b);
1120 template_engine.add_function("gt", filter_gt);
1121 template_engine.add_function("lt", filter_lt);
1122 template_engine.add_function("gte", filter_gte);
1123 template_engine.add_function("lte", filter_lte);
1124 template_engine.add_function("int", |a: &upon::Value| match a {
1125 upon::Value::Integer(x) => upon::Value::Integer(*x),
1126 upon::Value::Float(x) => upon::Value::Integer(*x as i64),
1127 upon::Value::String(s) => upon::Value::Integer(s.parse().expect("int filter expects an integer value")),
1128 _ => panic!("int filter expects an integer value"), });
1130 template_engine.add_function("float", |a: &upon::Value| match a {
1131 upon::Value::Float(_) => a.clone(),
1132 upon::Value::Integer(x) => upon::Value::Float(*x as f64),
1133 upon::Value::String(s) => upon::Value::Float(s.parse().expect("float filter expects a float value")),
1134 _ => panic!("int filter expects an integer value"), });
1136 template_engine.add_function("str", |a: upon::Value| match a {
1137 upon::Value::Integer(x) => upon::Value::String(format!("{}",x)),
1138 upon::Value::Float(x) => upon::Value::String(format!("{}",x)),
1139 upon::Value::Bool(x) => upon::Value::String(format!("{}",x)),
1140 upon::Value::String(_) => a,
1141 upon::Value::None => upon::Value::String(String::new()),
1142 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1144 upon::Value::String(s) => s.clone(),
1145 upon::Value::Integer(d) => format!("{}",d),
1146 upon::Value::Float(d) => format!("{}",d),
1147 upon::Value::Bool(d) => format!("{}",d),
1148 _ => String::new(),
1149 }).collect();
1150 upon::Value::String(newlist.join(", "))
1151 },
1152 _ => panic!("map to string not implemented"), });
1154 template_engine.add_function("as_range", |a: i64| upon::Value::List(std::ops::Range { start: 0, end: a }.into_iter().map(|x| upon::Value::Integer(x+1)).collect::<Vec<_>>()) );
1155 template_engine.add_function("last", |list: &[upon::Value]| list.last().map(Clone::clone));
1156 template_engine.add_function("first", |list: &[upon::Value]| {
1157 list.first().map(Clone::clone)
1158 });
1159 template_engine.add_function("tokenize", |s: &str| {
1160 upon::Value::List(
1161 s.split(|c| c == ' ' || c == '\n').filter_map(|x|
1162 if !x.is_empty() {
1163 Some(upon::Value::String(x.to_string()))
1164 } else {
1165 None
1166 }
1167 )
1168 .collect::<Vec<upon::Value>>())
1169 });
1170 template_engine.add_function("replace", |s: &str, from: &str, to: &str| {
1171 upon::Value::String(s.replace(from,to))
1172 });
1173 template_engine.add_function("starts_with", |s: &str, prefix: &str| {
1174 s.starts_with(prefix)
1175 });
1176 template_engine.add_function("ends_with", |s: &str, suffix: &str| {
1177 s.ends_with(suffix)
1178 });
1179 template_engine.add_function("basename", |a: &upon::Value| match a {
1180 upon::Value::String(s) => upon::Value::String(s.split(|c| c == '/' || c == '\\').last().expect("splitting must work").to_string()),
1181 _ => panic!("basename filter expects a string value"), });
1183 template_engine.add_function("noext", |a: &upon::Value| match a {
1184 upon::Value::String(s) => if let Some(pos) = s.rfind('.') {
1185 s[..pos].to_string()
1186 } else {
1187 s.to_string()
1188 },
1189 _ => panic!("basename filter expects a string value"), });
1191 template_engine.add_function("join", |list: &upon::Value, delimiter: &str| match list {
1192 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1194 upon::Value::String(s) => s.clone(),
1195 upon::Value::Integer(d) => format!("{}",d),
1196 upon::Value::Float(d) => format!("{}",d),
1197 upon::Value::Bool(d) => format!("{}",d),
1198 _ => String::new(),
1199 }).collect();
1200 upon::Value::String(newlist.join(delimiter))
1201 },
1202 _ => {
1203 list.clone() }
1205 });
1206 let mut converter = Self {
1207 cursor: 0,
1208 text: String::new(),
1209 template_engine,
1210 positionmap: HashMap::new(),
1211 bytepositionmap: HashMap::new(),
1212 scopes: HashMap::new(),
1213 markers: HashMap::new(),
1214 resource_handle: None,
1215 pending_whitespace: false,
1216 global_context: BTreeMap::new(),
1217 debugindent: String::new(),
1218 variables: BTreeMap::new(),
1219 prefixes,
1220 config,
1221 };
1222 converter.set_global_context();
1223 converter.add_external_filters();
1224 converter
1225 }
1226
1227 fn add_external_filters(&mut self) {
1228 for filter in self.config.external_filters.clone() {
1229 self.template_engine.add_function(filter.name.clone(), move |value: &upon::Value| filter.run(value) );
1230 }
1231 }
1232
1233 fn compile(&mut self) -> Result<(), XmlConversionError> {
1235 if self.config.debug {
1236 eprintln!("[STAM fromxml] compiling templates");
1237 }
1238 for element in self.config.elements.iter() {
1239 if let Some(textprefix) = element.textprefix.as_ref() {
1240 if self.template_engine.get_template(textprefix.as_str()).is_none() {
1241 let template = self.precompile(textprefix.as_str());
1242 self.template_engine
1243 .add_template(textprefix.clone(), template)
1244 .map_err(|e| {
1245 XmlConversionError::TemplateError(
1246 format!("element/textprefix template {}", textprefix.clone()),
1247 Some(e),
1248 )
1249 })?;
1250 }
1251 }
1252 if let Some(textsuffix) = element.textsuffix.as_ref() {
1253 if self.template_engine.get_template(textsuffix.as_str()).is_none() {
1254 let template = self.precompile(textsuffix.as_str());
1255 self.template_engine
1256 .add_template(textsuffix.clone(), template)
1257 .map_err(|e| {
1258 XmlConversionError::TemplateError(
1259 format!("element/textsuffix template {}", textsuffix.clone()),
1260 Some(e),
1261 )
1262 })?;
1263 }
1264 }
1265 if let Some(id) = element.id.as_ref() {
1266 if self.template_engine.get_template(id.as_str()).is_none() {
1267 let template = self.precompile(id.as_str());
1268 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1269 XmlConversionError::TemplateError(
1270 format!("element/id template {}", id.clone()),
1271 Some(e),
1272 )
1273 })?;
1274 }
1275 }
1276 for annotationdata in element.annotationdata.iter().chain(element.annotatetextprefix.iter()).chain(element.annotatetextsuffix.iter()) {
1277 if let Some(id) = annotationdata.id.as_ref() {
1278 if self.template_engine.get_template(id.as_str()).is_none() {
1279 let template = self.precompile(id.as_str());
1280 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1281 XmlConversionError::TemplateError(
1282 format!("annotationdata/id template {}", id.clone()),
1283 Some(e),
1284 )
1285 })?;
1286 }
1287 }
1288 if let Some(set) = annotationdata.set.as_ref() {
1289 if self.template_engine.get_template(set.as_str()).is_none() {
1290 let template = self.precompile(set.as_str());
1291 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1293 XmlConversionError::TemplateError(
1294 format!("annotationdata/set template {}", set.clone()),
1295 Some(e),
1296 )
1297 })?;
1298 }
1299 }
1300 if let Some(key) = annotationdata.key.as_ref() {
1301 if self.template_engine.get_template(key.as_str()).is_none() {
1302 let template = self.precompile(key.as_str());
1303 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1304 XmlConversionError::TemplateError(
1305 format!("annotationdata/key template {}", key.clone()),
1306 Some(e),
1307 )
1308 })?;
1309 }
1310 }
1311 if let Some(value) = annotationdata.value.as_ref() {
1312 self.compile_value(value)?;
1313 }
1314 }
1315 }
1316 for metadata in self.config.metadata.iter() {
1317 if let Some(id) = metadata.id.as_ref() {
1318 if self.template_engine.get_template(id.as_str()).is_none() {
1319 let template = self.precompile(id.as_str());
1320 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1321 XmlConversionError::TemplateError(
1322 format!("metadata/id template {}", id.clone()),
1323 Some(e),
1324 )
1325 })?;
1326 }
1327 }
1328 for annotationdata in metadata.annotationdata.iter() {
1329 if let Some(id) = annotationdata.id.as_ref() {
1330 if self.template_engine.get_template(id.as_str()).is_none() {
1331 let template = self.precompile(id.as_str());
1332 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1333 XmlConversionError::TemplateError(
1334 format!("annotationdata/id template {}", id.clone()),
1335 Some(e),
1336 )
1337 })?;
1338 }
1339 }
1340 if let Some(set) = annotationdata.set.as_ref() {
1341 if self.template_engine.get_template(set.as_str()).is_none() {
1342 let template = self.precompile(set.as_str());
1343 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1345 XmlConversionError::TemplateError(
1346 format!("annotationdata/set template {}", set.clone()),
1347 Some(e),
1348 )
1349 })?;
1350 }
1351 }
1352 if let Some(key) = annotationdata.key.as_ref() {
1353 if self.template_engine.get_template(key.as_str()).is_none() {
1354 let template = self.precompile(key.as_str());
1355 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1356 XmlConversionError::TemplateError(
1357 format!("annotationdata/key template {}", key.clone()),
1358 Some(e),
1359 )
1360 })?;
1361 }
1362 }
1363 if let Some(value) = annotationdata.value.as_ref() {
1364 self.compile_value(value)?;
1365 }
1366 }
1367 }
1368 Ok(())
1369 }
1370
1371 fn compile_value(&mut self, value: &'a toml::Value) -> Result<(), XmlConversionError> {
1373 match value {
1374 toml::Value::String(value) => {
1375 if self.template_engine.get_template(value.as_str()).is_none() {
1376 let template = self.precompile(value.as_str());
1377 self.template_engine.add_template(value.clone(), template).map_err(|e| {
1378 XmlConversionError::TemplateError(
1379 format!("annotationdata/value template {}", value.clone()),
1380 Some(e),
1381 )
1382 })?;
1383 }
1384 }
1385 toml::Value::Table(map) => {
1386 for (_key, value) in map.iter() {
1387 self.compile_value(value)?;
1388 }
1389 },
1390 toml::Value::Array(list) => {
1391 for value in list.iter() {
1392 self.compile_value(value)?;
1393 }
1394 }
1395 _ => {} }
1397 Ok(())
1398 }
1399
1400 fn extract_element_text<'b>(
1405 &mut self,
1406 node: Node<'a,'b>,
1407 path: &NodePath<'a,'b>,
1408 whitespace: XmlWhitespaceHandling,
1409 resource_id: Option<&str>,
1410 inputfile: Option<&str>,
1411 doc_num: usize,
1412 ) -> Result<(), XmlConversionError> {
1413 if self.config.debug {
1414 eprintln!("[STAM fromxml]{} extracting text for element {}", self.debugindent, path);
1415 }
1416 let mut begin = self.cursor; let mut bytebegin = self.text.len(); let mut end_discount = 0; let mut end_bytediscount = 0;
1420 let mut firsttext = true; let mut elder_siblings = SiblingCounter::default();
1423
1424 if let Some(element_config) = self.config.element_config(node, path) {
1426 if self.config.debug {
1427 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1428 }
1429
1430 if (element_config.stop == Some(false) || element_config.stop.is_none())
1431 && element_config.annotation != XmlAnnotationHandling::TextSelectorBetweenMarkers
1432 {
1433 let whitespace = if node.has_attribute((NS_XML, "space")) {
1436 match node.attribute((NS_XML, "space")).unwrap() {
1438 "preserve" => XmlWhitespaceHandling::Preserve,
1439 "collapse" | "replace" => XmlWhitespaceHandling::Collapse,
1440 _ => whitespace,
1441 }
1442 } else if element_config.whitespace == XmlWhitespaceHandling::Inherit
1443 || element_config.whitespace == XmlWhitespaceHandling::Unspecified
1444 {
1445 whitespace } else {
1447 element_config.whitespace };
1449
1450 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1452
1453 let textbegin = self.cursor;
1454 for child in node.children() {
1456 if self.config.debug {
1457 eprintln!("[STAM fromxml]{} child {:?}", self.debugindent, child);
1458 }
1459 if child.is_text() && element_config.text == Some(true) {
1460 let mut innertext = child.text().expect("text node must have text");
1464 let mut pending_whitespace = false;
1465 let mut leading_whitespace = false;
1466 if whitespace == XmlWhitespaceHandling::Collapse && !innertext.is_empty() {
1467 let mut all_whitespace = true;
1469 leading_whitespace = innertext.chars().next().unwrap().is_whitespace();
1470
1471 pending_whitespace = innertext
1474 .chars()
1475 .inspect(|c| {
1476 if !c.is_whitespace() {
1477 all_whitespace = false
1478 }
1479 })
1480 .last()
1481 .unwrap()
1482 .is_whitespace();
1483 if all_whitespace {
1484 self.pending_whitespace = true;
1485 if self.config.debug {
1486 eprintln!(
1487 "[STAM fromxml]{} ^- all whitespace, flag pending whitespace and skipping...",
1488 self.debugindent,
1489 );
1490 }
1491 continue;
1492 }
1493 innertext = innertext.trim();
1494 if self.config.debug {
1495 eprintln!(
1496 "[STAM fromxml]{} ^- collapsed whitespace: {:?}",
1497 self.debugindent,
1498 innertext
1499 );
1500 }
1501 }
1502 if self.pending_whitespace || leading_whitespace {
1503 if !self.text.is_empty()
1505 && !self.text.chars().rev().next().unwrap().is_whitespace()
1506 {
1507 if self.config.debug {
1508 eprintln!("[STAM fromxml]{} ^- outputting pending whitespace",self.debugindent);
1509 }
1510 self.text.push(' ');
1511 self.cursor += 1;
1512 if firsttext && self.pending_whitespace {
1513 begin += 1;
1514 bytebegin += 1;
1515 firsttext = false;
1516 }
1517 }
1518 self.pending_whitespace = false;
1519 }
1520
1521 if whitespace == XmlWhitespaceHandling::Collapse {
1523 let mut prevc = ' ';
1524 let mut innertext = innertext.replace(|c: char| c.is_whitespace(), " ");
1525 innertext.retain(|c| {
1526 let do_retain = c != ' ' || prevc != ' ';
1527 prevc = c;
1528 do_retain
1529 });
1530 self.text += &innertext;
1531 self.cursor += innertext.chars().count();
1532 if self.config.debug {
1533 eprintln!("[STAM fromxml]{} ^- outputting text child (collapsed whitespace), cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1534 }
1535 } else {
1536 self.text += &innertext;
1537 self.cursor += innertext.chars().count();
1538 if self.config.debug {
1539 eprintln!("[STAM fromxml]{} ^- outputting text child, cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1540 }
1541 }
1542 self.pending_whitespace = pending_whitespace;
1543 } else if child.is_element() {
1544 if self.config.debug {
1545 eprintln!("[STAM fromxml]{} \\- extracting text for this child", self.debugindent);
1546 }
1547 self.debugindent.push_str(" ");
1548 let mut path = path.clone();
1550 let count = elder_siblings.count(&child);
1551 path.add(&child, Some(count));
1552 self.extract_element_text(child, &path, whitespace, resource_id, inputfile, doc_num)?;
1553 self.debugindent.pop();
1554 self.debugindent.pop();
1555 } else {
1556 if self.config.debug {
1557 eprintln!("[STAM fromxml]{} ^- skipping this child node", self.debugindent);
1558 }
1559 continue;
1560 }
1561 }
1562
1563 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, textbegin)?;
1565
1566 if let Some(scope_id) = element_config.scope_id.as_ref() {
1568 self.scopes.insert( scope_id.clone(), (doc_num, node.id()) );
1569 }
1570 } else if element_config.annotation == XmlAnnotationHandling::TextSelectorBetweenMarkers
1571 {
1572 if self.config.debug {
1574 eprintln!("[STAM fromxml]{} adding to markers (textprefix={:?}, textsuffix={:?})", self.debugindent, element_config.textprefix, element_config.textsuffix);
1575 }
1576
1577
1578 self.markers
1579 .entry(element_config.hash())
1580 .and_modify(|v| v.push((doc_num, node.id())))
1581 .or_insert(vec![(doc_num, node.id())]);
1582
1583 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1586 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, self.cursor)?;
1587 }
1588 } else if self.config.debug {
1589 eprintln!(
1590 "[STAM fromxml]{} WARNING: no match, skipping text extraction for element {}",
1591 self.debugindent,
1592 path
1593 );
1594 }
1595
1596 if begin <= (self.cursor - end_discount) {
1600 let offset = Offset::simple(begin, self.cursor - end_discount);
1601 if self.config.debug {
1602 eprintln!(
1603 "[STAM fromxml]{} extracted text for {} @{:?}: {:?}",
1604 self.debugindent,
1605 path,
1606 &offset,
1607 &self.text[bytebegin..(self.text.len() - end_bytediscount)]
1608 );
1609 }
1610 self.positionmap.insert((doc_num, node.id(), PositionType::Body), offset);
1611 self.bytepositionmap
1612 .insert((doc_num, node.id(), PositionType::Body), (bytebegin, self.text.len() - end_bytediscount));
1613 }
1614 Ok(())
1615 }
1616
1617 fn process_textprefix<'b>(
1619 &mut self,
1620 element_config: &XmlElementConfig,
1621 node: Node<'a,'b>,
1622 resource_id: Option<&str>,
1623 inputfile: Option<&str>,
1624 doc_num: usize,
1625 begin: &mut usize,
1626 bytebegin: &mut usize
1627 ) -> Result<(), XmlConversionError> {
1628 if let Some(textprefix) = &element_config.textprefix {
1629 self.pending_whitespace = false;
1630 if self.config.debug {
1631 eprintln!("[STAM fromxml]{} outputting textprefix: {:?}", self.debugindent, textprefix);
1632 }
1633 let result =
1634 self.render_template(textprefix, &node, Some(self.cursor), None, resource_id, inputfile, doc_num)
1635 .map_err(|e| match e {
1636 XmlConversionError::TemplateError(s, e) => {
1637 XmlConversionError::TemplateError(
1638 format!(
1639 "whilst rendering textprefix template '{}' for node '{}': {}",
1640 textprefix, node.tag_name().name(), s
1641 ),
1642 e,
1643 )
1644 }
1645 e => e,
1646 })?;
1647 let result_charlen = result.chars().count();
1648
1649 if !element_config.annotatetextprefix.is_empty() {
1650 let offset = Offset::simple(self.cursor, self.cursor + result_charlen);
1652 self.positionmap.insert((doc_num, node.id(), PositionType::TextPrefix), offset);
1653 self.bytepositionmap
1654 .insert((doc_num, node.id(), PositionType::TextPrefix), (*bytebegin, *bytebegin + result.len()));
1655 }
1656
1657 self.cursor += result_charlen;
1658 self.text += &result;
1659
1660 if element_config.include_textprefix != Some(true) {
1661 *begin += result_charlen;
1663 *bytebegin += result.len();
1664 }
1665 }
1666 Ok(())
1667 }
1668
1669 fn process_textsuffix<'b>(
1671 &mut self,
1672 element_config: &XmlElementConfig,
1673 node: Node<'a,'b>,
1674 resource_id: Option<&str>,
1675 inputfile: Option<&str>,
1676 doc_num: usize,
1677 end_discount: &mut usize,
1678 end_bytediscount: &mut usize,
1679 textbegin: usize,
1680 ) -> Result<(), XmlConversionError> {
1681 if let Some(textsuffix) = &element_config.textsuffix {
1682 if self.config.debug {
1683 eprintln!("[STAM fromxml]{} outputting textsuffix: {:?}", self.debugindent, textsuffix);
1684 }
1685 let result = self.render_template(
1686 textsuffix.as_str(),
1687 &node,
1688 Some(textbegin),
1689 Some(self.cursor),
1690 resource_id,
1691 inputfile,
1692 doc_num
1693 ).map_err(|e| match e {
1694 XmlConversionError::TemplateError(s, e) => {
1695 XmlConversionError::TemplateError(
1696 format!(
1697 "whilst rendering textsuffix template '{}' for node '{}': {}",
1698 textsuffix,
1699 node.tag_name().name(),
1700 s
1701 ),
1702 e,
1703 )
1704 }
1705 e => e,
1706 })?;
1707 let end_discount_tmp = result.chars().count();
1708 let end_bytediscount_tmp = result.len();
1709
1710
1711 self.text += &result;
1712
1713 if !element_config.annotatetextsuffix.is_empty() {
1714 let offset = Offset::simple(self.cursor, self.cursor + end_discount_tmp);
1716 self.positionmap.insert((doc_num, node.id(), PositionType::TextSuffix), offset);
1717 self.bytepositionmap
1718 .insert((doc_num, node.id(), PositionType::TextSuffix), (self.text.len() - end_bytediscount_tmp, self.text.len()));
1719 }
1720
1721 self.cursor += end_discount_tmp;
1722 self.pending_whitespace = false;
1723
1724 if element_config.include_textsuffix == Some(true) {
1725 *end_discount = 0;
1727 *end_bytediscount = 0;
1728 } else {
1729 *end_discount = end_discount_tmp;
1731 *end_bytediscount = end_bytediscount_tmp;
1732 }
1733 }
1734 Ok(())
1735 }
1736
1737 fn extract_element_annotation<'b>(
1742 &mut self,
1743 node: Node<'a,'b>,
1744 path: &NodePath<'a,'b>,
1745 inputfile: Option<&str>,
1746 doc_num: usize,
1747 store: &mut AnnotationStore,
1748 ) -> Result<(), XmlConversionError> {
1749 if self.config.debug {
1750 eprintln!("[STAM fromxml]{} extracting annotation from {}", self.debugindent, path);
1751 }
1752
1753 let mut elder_siblings = SiblingCounter::default();
1754
1755 if let Some(element_config) = self.config.element_config(node, &path) {
1757 if self.config.debug {
1758 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1759 }
1760 if element_config.annotation != XmlAnnotationHandling::None
1761 && element_config.annotation != XmlAnnotationHandling::Unspecified
1762 {
1763 let mut builder = AnnotationBuilder::new();
1764
1765 let offset = self.positionmap.get(&(doc_num, node.id(), PositionType::Body));
1767 if element_config.annotation == XmlAnnotationHandling::TextSelector {
1768 if let Some((beginbyte, endbyte)) = self.bytepositionmap.get(&(doc_num, node.id(), PositionType::Body)) {
1769 if self.config.debug {
1770 eprintln!("[STAM fromxml]{} annotation covers text {:?} (bytes {}-{})", self.debugindent, offset, beginbyte, endbyte);
1771 }
1772 } else if self.text.is_empty() {
1773 return Err(XmlConversionError::ConfigError("Can't extract annotations on text if no text was extracted!".into()));
1774 }
1775 }
1776 let begin = if let Some(offset) = offset {
1777 if let Cursor::BeginAligned(begin) = offset.begin {
1778 Some(begin)
1779 } else {
1780 None
1781 }
1782 } else {
1783 None
1784 };
1785 let end = if let Some(offset) = offset {
1786 if let Cursor::BeginAligned(end) = offset.end {
1787 Some(end)
1788 } else {
1789 None
1790 }
1791 } else {
1792 None
1793 };
1794
1795 let resource_id = if let Some(resource_handle) = self.resource_handle {
1796 store.resource(resource_handle).unwrap().id()
1797 } else {
1798 None
1799 };
1800
1801 let mut have_id = false;
1802 if let Some(template) = &element_config.id {
1803 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1804 let compiled_template = self.template_engine.template(template.as_str());
1805 let id = compiled_template.render(&context).to_string().map_err(|e|
1806 XmlConversionError::TemplateError(
1807 format!(
1808 "whilst rendering id template '{}' for node '{}'",
1809 template,
1810 node.tag_name().name(),
1811 ),
1812 Some(e),
1813 )
1814 )?;
1815 if !id.is_empty() {
1816 builder = builder.with_id(id);
1817 have_id = true;
1818 }
1819 }
1820
1821 if !have_id {
1822 if let Some(resource_id) = resource_id {
1824 builder = builder.with_id(stam::generate_id(&format!("{}-",resource_id), ""));
1825 } else {
1826 builder = builder.with_id(stam::generate_id("", ""));
1827 }
1828 }
1829
1830 builder = self.add_annotationdata_to_builder(element_config.annotationdata.iter(), builder, node.clone(), begin, end, resource_id, inputfile, doc_num)?;
1831
1832
1833 if self.config.provenance && inputfile.is_some() {
1834 let path_string = if let Some(id) = node.attribute((NS_XML,"id")) {
1835 format!("//{}[@xml:id=\"{}\"]", self.get_node_name_for_xpath(&node), id)
1837 } else {
1838 path.format_as_xpath(&self.prefixes)
1840 };
1841 let databuilder = AnnotationDataBuilder::new().with_dataset(CONTEXT_ANNO.into()).with_key("target".into()).with_value(
1842 BTreeMap::from([
1843 ("source".to_string(),inputfile.unwrap().into()),
1844 ("selector".to_string(),
1845 BTreeMap::from([
1846 ("type".to_string(),"XPathSelector".into()),
1847 ("value".to_string(),path_string.into())
1848 ]).into()
1849 )
1850 ]).into()
1851 );
1852 builder = builder.with_data_builder(databuilder);
1853 }
1854
1855
1856 match element_config.annotation {
1858 XmlAnnotationHandling::TextSelector => {
1859 if let Some(selector) = self.textselector(node, doc_num, PositionType::Body) {
1861 builder = builder.with_target(selector);
1862 if self.config.debug {
1863 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
1864 }
1865 store.annotate(builder)?;
1866 }
1867 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1868 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1869 }
1870 }
1871 XmlAnnotationHandling::ResourceSelector => {
1872 builder = builder.with_target(SelectorBuilder::ResourceSelector(
1874 self.resource_handle.into(),
1875 ));
1876 if self.config.debug {
1877 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
1878 }
1879 store.annotate(builder)?;
1880 }
1881 XmlAnnotationHandling::TextSelectorBetweenMarkers => {
1882 if let Some(selector) =
1884 self.textselector_for_markers(node, doc_num, store, element_config)
1885 {
1886 builder = builder.with_target(selector);
1887 if self.config.debug {
1888 eprintln!(
1889 "[STAM fromxml] builder TextSelectorBetweenMarkers: {:?}",
1890 builder
1891 );
1892 }
1893 store.annotate(builder)?;
1894 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1895 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1896 }
1897 }
1898 }
1899 _ => panic!(
1900 "Invalid annotationhandling: {:?}",
1901 element_config.annotation
1902 ),
1903 }
1904 }
1905
1906 if element_config.stop == Some(false) || element_config.stop.is_none() {
1908 for child in node.children() {
1909 if child.is_element() {
1910 self.debugindent.push_str(" ");
1911 let mut path = path.clone();
1912 let count = elder_siblings.count(&child);
1913 path.add(&child, Some(count));
1914 self.extract_element_annotation(child, &path, inputfile, doc_num, store)?;
1916 self.debugindent.pop();
1917 self.debugindent.pop();
1918 }
1919 }
1920 }
1921 } else {
1922 eprintln!(
1923 "[STAM fromxml]{} WARNING: no match, skipping annotation extraction for element {}",
1924 self.debugindent,
1925 path
1926 );
1927 }
1928 Ok(())
1929 }
1930
1931 fn add_annotationdata_to_builder<'input>(&self, iter: impl Iterator<Item = &'a XmlAnnotationDataConfig>,
1932 mut builder: AnnotationBuilder<'a>,
1933 node: Node<'a, 'input>,
1934 begin: Option<usize>,
1935 end: Option<usize>,
1936 resource_id: Option<&str>,
1937 inputfile: Option<&str>,
1938 doc_num: usize,
1939 ) -> Result<AnnotationBuilder<'a>, XmlConversionError> {
1940 for annotationdata in iter {
1941 let mut databuilder = AnnotationDataBuilder::new();
1942 if let Some(template) = &annotationdata.set {
1943 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1944 let compiled_template = self.template_engine.template(template.as_str());
1945 let dataset = compiled_template.render(&context).to_string().map_err(|e|
1946 XmlConversionError::TemplateError(
1947 format!(
1948 "whilst rendering annotationdata/dataset template '{}' for node '{}'",
1949 template,
1950 node.tag_name().name(),
1951 ),
1952 Some(e),
1953 )
1954 )?;
1955 if !dataset.is_empty() {
1956 databuilder = databuilder.with_dataset(dataset.into())
1957 }
1958 } else {
1959 databuilder =
1960 databuilder.with_dataset(self.config.default_set.as_str().into());
1961 }
1962 if let Some(template) = &annotationdata.key {
1963 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1964 let compiled_template = self.template_engine.template(template.as_str());
1965 match compiled_template.render(&context).to_string().map_err(|e|
1966 XmlConversionError::TemplateError(
1967 format!(
1968 "whilst rendering annotationdata/key template '{}' for node '{}'",
1969 template,
1970 node.tag_name().name(),
1971 ),
1972 Some(e),
1973 )
1974 ) {
1975 Ok(key) if !key.is_empty() =>
1976 databuilder = databuilder.with_key(key.into()) ,
1977 Ok(_) if !annotationdata.skip_if_missing => {
1978 return Err(XmlConversionError::TemplateError(
1979 format!(
1980 "whilst rendering annotationdata/key template '{}' for node '{}'",
1981 template,
1982 node.tag_name().name(),
1983 ),
1984 None
1985 ));
1986 },
1987 Err(e) if !annotationdata.skip_if_missing => {
1988 return Err(e)
1989 },
1990 _ => {
1991 continue
1993 }
1994 }
1995 }
1996 if let Some(value) = &annotationdata.value {
1997 match self.extract_value(value, node, annotationdata.allow_empty_value, annotationdata.skip_if_missing, annotationdata.valuetype.as_ref().map(|s| s.as_str()), begin, end, resource_id, inputfile, doc_num)? {
1998 Some(DataValue::List(values)) if annotationdata.multiple => {
1999 for value in values {
2000 let mut databuilder_multi = databuilder.clone();
2001 databuilder_multi = databuilder_multi.with_value(value);
2002 builder = builder.with_data_builder(databuilder_multi);
2003 }
2004 },
2005 Some(value) => {
2006 databuilder = databuilder.with_value(value);
2007 },
2008 None => {
2009 continue
2011 }
2012 }
2013 }
2014 if !annotationdata.multiple {
2015 builder = builder.with_data_builder(databuilder);
2016 }
2017 }
2018 Ok(builder)
2019 }
2020
2021 fn annotate_textaffixes<'b>(
2023 &mut self,
2024 node: Node<'a,'b>,
2025 element_config: &XmlElementConfig,
2026 inputfile: Option<&str>,
2027 doc_num: usize,
2028 store: &mut AnnotationStore,
2029 ) -> Result<(), XmlConversionError> {
2030
2031
2032 if !element_config.annotatetextprefix.is_empty() {
2033 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textprefix-", ""));
2034 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextPrefix)) {
2035 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2036 Some(begin)
2037 } else {
2038 None
2039 };
2040 let end = if let Cursor::BeginAligned(end) = offset.end {
2041 Some(end)
2042 } else {
2043 None
2044 };
2045 builder = self.add_annotationdata_to_builder(element_config.annotatetextprefix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextPrefix) {
2047 builder = builder.with_target(selector);
2048 if self.config.debug {
2049 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2050 }
2051 store.annotate(builder)?;
2052 } else {
2053 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2054 }
2055 }
2056 }
2057
2058 if !element_config.annotatetextsuffix.is_empty() {
2059 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textsuffix-", ""));
2060 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextSuffix)) {
2061 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2062 Some(begin)
2063 } else {
2064 None
2065 };
2066 let end = if let Cursor::BeginAligned(end) = offset.end {
2067 Some(end)
2068 } else {
2069 None
2070 };
2071 builder = self.add_annotationdata_to_builder(element_config.annotatetextsuffix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextSuffix) {
2073 builder = builder.with_target(selector);
2074 if self.config.debug {
2075 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2076 }
2077 store.annotate(builder)?;
2078 } else {
2079 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2080 }
2081 }
2082 }
2083 Ok(())
2084 }
2085
2086 fn extract_value<'b>(&self, value: &'a toml::Value, node: Node<'a,'b>, allow_empty_value: bool, skip_if_missing: bool, valuetype: Option<&str>, begin: Option<usize>, end: Option<usize>, resource_id: Option<&str>, inputfile: Option<&str>, doc_num: usize) -> Result<Option<DataValue>, XmlConversionError>{
2088 match value {
2089 toml::Value::String(template) => {
2090 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
2091 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2103 XmlConversionError::TemplateError(
2104 format!(
2105 "whilst rendering annotationdata/map template '{}' for node '{}'.{}",
2106 template,
2107 node.tag_name().name(),
2108 if self.config.debug() {
2109 format!("\nContext was {:?}.\nVariables are: {:?}", context, self.variables.get(template))
2110 } else {
2111 String::new()
2112 }
2113 ),
2114 Some(e),
2115 )
2116 ) {
2117 Ok(value) => {
2118 if !value.is_empty() || allow_empty_value {
2119 string_to_datavalue(value, valuetype).map(|v| Some(v))
2120 } else {
2121 Ok(None)
2123 }
2124 },
2125 Err(e) if !skip_if_missing => {
2126 Err(e)
2127 },
2128 Err(_) if allow_empty_value => {
2129 Ok(Some("".into()))
2130 },
2131 Err(_) => {
2132 Ok(None)
2134 }
2135 }
2136 },
2137 toml::Value::Table(map) => {
2138 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2139 for (key, value) in map.iter() {
2140 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2141 resultmap.insert(key.clone(), value);
2142 }
2143 }
2144 Ok(Some(resultmap.into()))
2145 },
2146 toml::Value::Array(list) => {
2147 let mut resultlist: Vec<DataValue> = Vec::new();
2148 for value in list.iter() {
2149 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2150 resultlist.push(value);
2151 }
2152 }
2153 Ok(Some(resultlist.into()))
2154 }
2155 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2156 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2157 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2158 toml::Value::Datetime(_v) => {
2159 todo!("fromxml: Datetime conversion not implemented yet");
2160 }
2161 }
2162 }
2163
2164 fn extract_value_metadata<'b>(&self, value: &'a toml::Value, context: &upon::Value, allow_empty_value: bool, skip_if_missing: bool, resource_id: Option<&str>) -> Result<Option<DataValue>, XmlConversionError>{
2166 match value {
2167 toml::Value::String(template) => {
2168 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2170 XmlConversionError::TemplateError(
2171 format!(
2172 "whilst rendering annotationdata/metadata template '{}' for metadata",
2173 template,
2174 ),
2175 Some(e),
2176 )
2177 ) {
2178 Ok(value) => {
2179 if !value.is_empty() || allow_empty_value {
2180 Ok(Some(value.into()))
2181 } else {
2182 Ok(None)
2184 }
2185 },
2186 Err(e) if !skip_if_missing => {
2187 Err(e)
2188 },
2189 Err(_) if allow_empty_value => {
2190 Ok(Some("".into()))
2191 },
2192 Err(_) => {
2193 Ok(None)
2195 }
2196 }
2197 },
2198 toml::Value::Table(map) => {
2199 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2200 for (key, value) in map.iter() {
2201 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2202 resultmap.insert(key.clone(), value);
2203 }
2204 }
2205 Ok(Some(resultmap.into()))
2206 },
2207 toml::Value::Array(list) => {
2208 let mut resultlist: Vec<DataValue> = Vec::new();
2209 for value in list.iter() {
2210 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2211 resultlist.push(value);
2212 }
2213 }
2214 Ok(Some(resultlist.into()))
2215 }
2216 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2217 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2218 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2219 toml::Value::Datetime(_v) => {
2220 todo!("fromxml: Datetime conversion not implemented yet");
2221 }
2222 }
2223 }
2224
2225 fn textselector<'s>(&'s self, node: Node, doc_num: usize, positiontype: PositionType) -> Option<SelectorBuilder<'s>> {
2227 let res_handle = self.resource_handle.expect("resource must be associated");
2228 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), positiontype)) {
2229 Some(SelectorBuilder::TextSelector(
2230 BuildItem::Handle(res_handle),
2231 offset.clone(),
2232 ))
2233 } else {
2234 None
2235 }
2236 }
2237
2238 fn textselector_for_markers<'b>(
2240 &self,
2241 node: Node,
2242 doc_num: usize,
2243 store: &AnnotationStore,
2244 element_config: &'b XmlElementConfig,
2245 ) -> Option<SelectorBuilder<'b>> {
2246 let resource = store
2247 .resource(
2248 self.resource_handle
2249 .expect("resource must have been created"),
2250 )
2251 .expect("resource must exist");
2252 let mut end: Option<usize> = None;
2253 if let Some(markers) = self.markers.get(&element_config.hash()) {
2254 let mut grab = false;
2255 for (d_num, n_id) in markers.iter() {
2256 if grab {
2257 end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2259 offset
2260 .begin
2261 .try_into()
2262 .expect("begin cursor must be beginaligned")
2263 });
2264 break;
2265 }
2266 if doc_num == *d_num && *n_id == node.id() {
2267 grab = true;
2269 }
2270 }
2271 };
2272 if end.is_none() {
2273 if let Some(scope) = element_config.marker_scope.as_deref() {
2276 if let Some((d_num, n_id)) = self.scopes.get(scope) {
2277 end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2278 offset
2279 .end
2280 .try_into()
2281 .expect("end cursor must be beginaligned")
2282 });
2283 } else {
2284 eprintln!("WARNING: Undefined scope referenced in marker_scope: {}, no matching text with this `scope_id` in this document! Skipping last marker!", scope);
2285 return None;
2286 }
2287 } else {
2288 end = Some(resource.textlen());
2290 }
2291 }
2292 if let (Some(offset), Some(end)) = (self.positionmap.get(&(doc_num, node.id(), PositionType::Body)), end) {
2293 Some(SelectorBuilder::TextSelector(
2294 BuildItem::Handle(self.resource_handle.unwrap()),
2295 Offset::simple(
2296 offset
2297 .begin
2298 .try_into()
2299 .expect("begin cursor must be beginaligned"),
2300 end,
2301 ),
2302 ))
2303 } else {
2304 None
2305 }
2306 }
2307
2308 fn set_global_context(&mut self) {
2309 self.global_context
2310 .insert("context".into(), upon::Value::Map(self.config.context.iter().map(|(k,v)| (k.clone(), map_value(v))).collect()));
2311 self.global_context
2312 .insert("namespaces".into(), self.config.namespaces.clone().into());
2313 self.global_context
2314 .insert("default_set".into(), self.config.default_set.clone().into());
2315 }
2316
2317 fn render_template<'input, 't>(
2318 &self,
2319 template: &'t str,
2320 node: &Node<'a, 'input>,
2321 begin: Option<usize>,
2322 end: Option<usize>,
2323 resource: Option<&str>,
2324 inputfile: Option<&str>,
2325 doc_num: usize,
2326 ) -> Result<Cow<'t, str>, XmlConversionError> {
2327 if template.chars().any(|c| c == '{') {
2328 let compiled_template = self.template_engine.template(template);
2330 let context = self.context_for_node(&node, begin, end, template, resource, inputfile, doc_num);
2331 let result = compiled_template.render(context).to_string()?;
2332 Ok(Cow::Owned(result))
2333 } else {
2334 Ok(Cow::Borrowed(template))
2336 }
2337 }
2338
2339 fn context_for_node<'input>(
2340 &self,
2341 node: &Node<'a, 'input>,
2342 begin: Option<usize>,
2343 end: Option<usize>,
2344 template: &str,
2345 resource: Option<&str>,
2346 inputfile: Option<&str>,
2347 doc_num: usize,
2348 ) -> upon::Value {
2349 let mut context = self.global_context.clone();
2350 let length = if let (Some(begin), Some(end)) = (begin, end) {
2351 Some(end - begin)
2352 } else {
2353 None
2354 };
2355 context.insert("localname".into(), node.tag_name().name().into());
2356 context.insert("name".into(), self.get_node_name_for_template(node).into());
2358 if let Some(namespace) = node.tag_name().namespace() {
2359 context.insert("namespace".into(), namespace.into());
2361 }
2362
2363 if let Some(begin) = begin {
2365 context.insert("begin".into(), upon::Value::Integer(begin as i64));
2366 }
2367 if let Some(end) = end {
2368 context.insert("end".into(), upon::Value::Integer(end as i64));
2369 }
2370 if let Some(length) = length {
2371 context.insert("length".into(), upon::Value::Integer(length as i64));
2372 }
2373 if let Some(resource) = resource {
2374 context.insert("resource".into(), resource.into());
2376 }
2377 if let Some(inputfile) = inputfile {
2378 context.insert("inputfile".into(), inputfile.into());
2380 }
2381 context.insert("doc_num".into(), upon::Value::Integer(doc_num as i64));
2383
2384 if let Some(vars) = self.variables.get(template) {
2385 for var in vars {
2386 let mut encodedvar = String::new();
2387 if let Some(value) = self.context_for_var(node, var, &mut encodedvar, false) {
2388 if self.config.debug() {
2389 eprintln!(
2390 "[STAM fromxml] Set context variable for template '{}' for node '{}': {}={:?} (encodedvar={})",
2391 template,
2392 node.tag_name().name(),
2393 var,
2394 value,
2395 encodedvar
2396 );
2397 }
2398 if value != upon::Value::None {
2399 context.insert(encodedvar, value);
2400 }
2401 } else if self.config.debug() {
2402 eprintln!(
2403 "[STAM fromxml] Missed context variable for template '{}' for node '{}': {}",
2404 template,
2405 node.tag_name().name(),
2406 var
2407 );
2408 }
2409 }
2410 }
2411 upon::Value::Map(context)
2412 }
2413
2414 fn context_for_var<'input>(
2418 &self,
2419 node: &Node<'a, 'input>,
2420 var: &str,
2421 path: &mut String,
2422 mut return_all_matches: bool,
2423 ) -> Option<upon::Value> {
2424
2425 let first = path.is_empty();
2427
2428 let var = if var.starts_with("?.$$") {
2429 if first {
2430 path.push_str("?.ELEMENTS_");
2431 return_all_matches = true;
2432 if self.config.debug {
2433 eprintln!("[STAM fromxml] will return all matches for {}", var);
2434 }
2435 };
2436 &var[4..]
2437 } else if var.starts_with("?.$") {
2438 if first {
2439 path.push_str("?.ELEMENT_");
2440 };
2441 &var[3..]
2442 } else if var.starts_with("$$") {
2443 if first {
2444 path.push_str("ELEMENTS_");
2445 return_all_matches = true;
2446 if self.config.debug {
2447 eprintln!("[STAM fromxml] will return all matches for {}", var);
2448 }
2449 };
2450 &var[2..]
2451 } else if var.starts_with("$") {
2452 if first {
2453 path.push_str("ELEMENT_");
2454 };
2455 &var[1..]
2456 } else if var.starts_with("?.@") {
2457 if first {
2458 path.push_str("?.");
2459 };
2460 &var[2..]
2461 } else {
2462 var
2463 };
2464
2465 if !first && !var.is_empty() && !path.ends_with("ELEMENT_") && !path.ends_with("ELEMENTS_"){
2466 path.push_str("_IN_");
2467 }
2468
2469 let (component, remainder) = var.split_once("/").unwrap_or((var,""));
2471 if component.is_empty() {
2473 if first && !remainder.is_empty() {
2474 let mut n = node.clone();
2476 while let Some(parentnode) = n.parent_element() {
2478 n = parentnode;
2479 }
2480 let (rootcomponent, remainder) = remainder.split_once("/").unwrap_or((remainder,""));
2482 let (prefix, localname) = if let Some(pos) = rootcomponent.find(":") {
2483 (Some(&rootcomponent[0..pos]), &rootcomponent[pos+1..])
2484 } else {
2485 (None, rootcomponent)
2486 };
2487 if localname != n.tag_name().name() && localname != "*" {
2489 None
2490 } else {
2491 if let Some(prefix) = prefix {
2492 path.push_str(prefix);
2493 path.push_str("__");
2494 }
2495 path.push_str(localname);
2496 self.context_for_var(&n, remainder, path, return_all_matches)
2497 }
2498 } else {
2499 Some(recursive_text(node).into())
2502 }
2503 } else if component.starts_with("@"){
2504 if let Some(pos) = component.find(":") {
2505 let prefix = &component[1..pos];
2506 if let Some(ns) = self.config.namespaces.get(prefix) {
2507 let var = &component[pos+1..];
2508 path.push_str("ATTRIB_");
2509 path.push_str(prefix);
2510 path.push_str("__");
2511 path.push_str(var);
2512 Some(
2513 node.attribute((ns.as_str(),var)).into()
2514 )
2515 } else {
2516 None
2517 }
2518 } else {
2519 let var = &component[1..];
2520 path.push_str("ATTRIB_");
2521 path.push_str(var);
2522 Some(
2523 node.attribute(var).into()
2524 )
2525 }
2526 } else if component == ".." {
2527 if let Some(parentnode) = node.parent_element().as_ref() {
2528 path.push_str("PARENT");
2530 self.context_for_var(parentnode, remainder, path, return_all_matches)
2531 } else {
2532 None
2533 }
2534 } else if component == "." {
2535 path.push_str("THIS");
2536 if !remainder.is_empty() {
2537 self.context_for_var(node, remainder, path, return_all_matches)
2539 } else {
2540 Some(recursive_text(node).into())
2541 }
2542 } else {
2543 let (prefix, localname) = if let Some(pos) = component.find(":") {
2544 (Some(&component[0..pos]), &component[pos+1..])
2545 } else {
2546 (None, component)
2547 };
2548 let localname_with_condition = localname;
2549 let (localname, condition_str, condition) = self.extract_condition(localname_with_condition); let mut multiple_value_buffer: Vec<upon::Value> = Vec::new(); let mut final_path: String = String::new(); for child in node.children() {
2554 if child.is_element() {
2555 let namedata = child.tag_name();
2556 let mut child_matches = if let Some(namespace) = namedata.namespace() {
2557 if let Some(foundprefix) = self.prefixes.get(namespace) {
2558 Some(foundprefix.as_str()) == prefix && localname == namedata.name()
2559 } else {
2560 false
2561 }
2562 } else {
2563 namedata.name() == localname
2564 };
2565 if child_matches {
2566 if let Some((attribname, negate, attribvalue)) = condition {
2568 if let Some(pos) = attribname.find(":") {
2570 let prefix = &attribname[0..pos];
2571 if let Some(ns) = self.config.namespaces.get(prefix) {
2572 let attribname = &attribname[pos+1..];
2573 if let Some(value) = child.attribute((ns.as_str(),attribname)) {
2574 if !negate && attribvalue != Some(value) {
2575 child_matches = false;
2576 } else if negate && attribvalue == Some(value) {
2577 child_matches = false;
2578 }
2579 } else {
2580 child_matches = false;
2581 }
2582 } else {
2583 child_matches = false;
2584 }
2585 } else {
2586 if let Some(value) = child.attribute(attribname) {
2587 if !negate && attribvalue != Some(value) {
2588 child_matches = false;
2589 } else if negate && attribvalue == Some(value) {
2590 child_matches = false;
2591 }
2592 } else {
2593 child_matches = false;
2594 }
2595 }
2596 }
2597 if !child_matches && self.config.debug {
2598 eprintln!("[STAM fromxml] candidate node does not meet condition: {}", localname_with_condition);
2599 }
2600 }
2602 if child_matches {
2603 let prevpathlen = path.len();
2604 if let Some(prefix) = prefix {
2606 path.push_str(prefix);
2607 path.push_str("__");
2608 }
2609 path.push_str(localname);
2610 if condition.is_some() {
2611 let mut hasher = DefaultHasher::new();
2613 condition_str.hash(&mut hasher);
2614 let h = hasher.finish();
2615 path.push_str(&format!("_COND{}_", h));
2616 }
2617 if let Some(value) = self.context_for_var(&child, remainder, path, return_all_matches) {
2618 if return_all_matches {
2620 if let upon::Value::List(v) = value {
2621 multiple_value_buffer.extend(v.into_iter());
2622 } else {
2623 multiple_value_buffer.push(value);
2624 }
2625 if final_path.is_empty() {
2626 final_path = path.clone();
2627 }
2628 } else {
2630 return Some(value);
2632 }
2633 }
2634 path.truncate(prevpathlen);
2636 }
2637 }
2638 }
2639 if !multiple_value_buffer.is_empty() {
2640 if self.config.debug {
2642 eprintln!("[STAM fromxml] returning multiple matches of {} as list", var);
2643 }
2644 *path = final_path;
2646 Some(multiple_value_buffer.into())
2647 } else {
2648 if self.config.debug {
2650 eprintln!("[STAM fromxml] returning with no match found for {} in {}", var, node.tag_name().name());
2651 }
2652 None
2653 }
2654 }
2655 }
2656
2657 fn extract_condition<'b>(&self, localname: &'b str) -> (&'b str, &'b str, Option<(&'b str, bool, Option<&'b str>)>) { if localname.ends_with("]") {
2660 if let Some(pos) = localname.find("[") {
2661 let condition = &localname[pos+1..localname.len()-1];
2662 let (mut attrib, negation, attribvalue) = if let Some(pos) = condition.find("=") {
2663 let attrib = condition[0..pos].trim();
2664 let value = condition[pos+1..].trim();
2665 let value = &value[1..value.len() - 1]; if attrib.ends_with('!') {
2667 (attrib[..attrib.len() - 1].trim(), true, Some(value))
2669 } else {
2670 (attrib.trim(), false, Some(value))
2671 }
2672 } else {
2673 (condition, false, None)
2674 };
2675 if attrib.starts_with('@') {
2676 attrib = &attrib[1..];
2678 }
2679 return (&localname[..pos], condition, Some((attrib, negation,attribvalue )) );
2680 }
2681 }
2682 (localname, "", None)
2683 }
2684
2685
2686 fn get_node_name_for_template<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2687 let extended_name = node.tag_name();
2688 match (extended_name.namespace(), extended_name.name()) {
2689 (Some(namespace), tagname) => {
2690 if let Some(prefix) = self.prefixes.get(namespace) {
2691 Cow::Owned(format!("{}__{}", prefix, tagname))
2692 } else {
2693 Cow::Borrowed(tagname)
2694 }
2695 }
2696 (None, tagname) => Cow::Borrowed(tagname),
2697 }
2698 }
2699
2700 fn get_node_name_for_xpath<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2701 let extended_name = node.tag_name();
2702 match (extended_name.namespace(), extended_name.name()) {
2703 (Some(namespace), tagname) => {
2704 if let Some(prefix) = self.prefixes.get(namespace) {
2705 Cow::Owned(format!("{}:{}", prefix, tagname))
2706 } else {
2707 Cow::Borrowed(tagname)
2708 }
2709 }
2710 (None, tagname) => Cow::Borrowed(tagname),
2711 }
2712 }
2713
2714
2715 fn precompile(&mut self, template: &'a str) -> Cow<'a,str> {
2716 let mut replacement = String::new();
2717 let mut variables: BTreeSet<&'a str> = BTreeSet::new();
2718 let mut begin = 0;
2719 let mut end = 0;
2720 for i in 0..template.len() {
2721 let slice = &template[i..];
2722 if slice.starts_with("{{") || slice.starts_with("{%") {
2723 begin = i;
2724 } else if slice.starts_with("}}") || slice.starts_with("%}") {
2725 if end < begin+2 {
2726 replacement.push_str(&template[end..begin+2]);
2727 }
2728 let inner = &template[begin+2..i]; replacement.push_str(&self.precompile_inblock(inner, &mut variables));
2730 end = i;
2731 }
2732 }
2733 if end > 0 {
2734 replacement.push_str(&template[end..]);
2735 }
2736 self.variables.insert(template.into(), variables);
2737 if !replacement.is_empty() {
2740 Cow::Owned(replacement)
2741 } else {
2742 Cow::Borrowed(template)
2743 }
2744 }
2745
2746 fn precompile_inblock<'s>(&self, s: &'s str, vars: &mut BTreeSet<&'s str>) -> Cow<'s,str> {
2747 let mut quoted = false;
2748 let mut var = false;
2749 let mut begin = 0;
2750 let mut end = 0;
2751 let mut replacement = String::new();
2752 let mut in_condition = false;
2753 for (i,c) in s.char_indices() {
2754 if in_condition && c != ']' {
2755 continue;
2756 }
2757 if c == '"' {
2758 quoted = !quoted;
2759 } else if !quoted {
2760 if !var && (c == '@' || c == '$') {
2761 var = true;
2763 begin = i;
2764 } else if var && c == '[' {
2765 in_condition = true;
2766 } else if var && in_condition && c == ']' {
2767 in_condition = false;
2769 } else if var && in_condition {
2770 continue;
2772 } else if var && (!c.is_alphanumeric() && c != '$' && c != '.' && c != '/' && c != '_' && c != ':' && c != '@') {
2773 if end < begin {
2775 replacement.push_str(&s[end..begin]);
2776 }
2777 let varname = &s[begin..i];
2778 vars.insert(varname);
2779 let replacement_var = self.precompile_name(varname);
2780 replacement += &replacement_var;
2781 end = i;
2782 var = false;
2783 }
2784 }
2785 }
2786 if end > 0 {
2787 replacement.push_str(&s[end..]);
2788 }
2789 if var {
2790 let varname = &s[begin..];
2792 vars.insert(varname);
2793 let replacement_var = self.precompile_name(varname);
2794 replacement += &replacement_var;
2795 }
2796 if !replacement.is_empty() {
2797 Cow::Owned(replacement)
2799 } else {
2800 Cow::Borrowed(s)
2801 }
2802 }
2803
2804 fn precompile_name(&self, s: &str) -> String {
2806 let mut replacement = String::new();
2807 let mut begincondition = None;
2808 let mut skip = 0;
2809 for (i,c) in s.char_indices() {
2810 if begincondition.is_some() && c != ']' {
2811 continue;
2812 } else if skip > 0 {
2813 skip -= 1;
2814 continue;
2815 }
2816 if c == '$' {
2817 let slice = &s[i..];
2818 if slice.starts_with("$$..") {
2819 replacement.push_str("ELEMENTS_PARENT");
2820 skip = 3;
2821 } else if slice.starts_with("$$.") {
2822 replacement.push_str("ELEMENTS_THIS");
2823 skip = 2;
2824 } else if slice.starts_with("$$/") {
2825 replacement.push_str("ELEMENTS_");
2826 skip = 2;
2827 } else if slice.starts_with("$$") {
2828 replacement.push_str("ELEMENTS_");
2829 skip = 1;
2830 } else if slice.starts_with("$..") {
2831 replacement.push_str("ELEMENT_PARENT");
2832 skip = 2;
2833 } else if slice.starts_with("$.") {
2834 replacement.push_str("ELEMENT_THIS");
2835 skip = 1;
2836 } else if slice.starts_with("$/") {
2837 replacement.push_str("ELEMENT_");
2838 skip = 1;
2839 } else {
2840 replacement.push_str("ELEMENT_");
2841 }
2842 } else if c == '@' {
2843 replacement.push_str("ATTRIB_");
2844 } else if c == '/' {
2845 replacement.push_str("_IN_");
2846 } else if c == ':' {
2847 replacement.push_str("__");
2848 } else if c == '[' {
2849 begincondition = Some(i+1);
2850 } else if c == ']' {
2851 if let Some(begin) = begincondition {
2853 let mut hasher = DefaultHasher::new();
2854 let _ = &s[begin..i].hash(&mut hasher);
2855 let h = hasher.finish();
2856 replacement.push_str(&format!("_COND{}_", h));
2857 }
2858 begincondition = None;
2859 } else {
2860 replacement.push(c);
2861 }
2862 }
2863 replacement
2865 }
2866
2867 fn add_metadata(&self, store: &mut AnnotationStore) -> Result<(), XmlConversionError> {
2868 for metadata in self.config.metadata.iter() {
2869 let mut builder = AnnotationBuilder::new();
2870
2871 let resource_id = if let Some(resource_handle) = self.resource_handle {
2872 store.resource(resource_handle).unwrap().id()
2873 } else {
2874 None
2875 };
2876
2877 let mut context = self.global_context.clone();
2878 if let Some(resource_id) = resource_id {
2879 context.insert("resource".into(), resource_id.into());
2880 }
2881
2882 if let Some(template) = &metadata.id {
2883 let compiled_template = self.template_engine.template(template.as_str());
2884 let id = compiled_template.render(&context).to_string().map_err(|e|
2885 XmlConversionError::TemplateError(
2886 format!(
2887 "whilst rendering metadata id template '{}'",
2888 template,
2889 ),
2890 Some(e),
2891 )
2892 )?;
2893 if !id.is_empty() {
2894 builder = builder.with_id(id);
2895 }
2896 }
2897
2898 for annotationdata in metadata.annotationdata.iter() {
2899 let mut databuilder = AnnotationDataBuilder::new();
2900 if let Some(template) = &annotationdata.set {
2901 let compiled_template = self.template_engine.template(template.as_str());
2902 let dataset = compiled_template.render(&context).to_string().map_err(|e|
2903 XmlConversionError::TemplateError(
2904 format!(
2905 "whilst rendering annotationdata/dataset template '{}' for metadata",
2906 template,
2907 ),
2908 Some(e),
2909 )
2910 )?;
2911 if !dataset.is_empty() {
2912 databuilder = databuilder.with_dataset(dataset.into())
2913 }
2914 } else {
2915 databuilder =
2916 databuilder.with_dataset(self.config.default_set.as_str().into());
2917 }
2918 if let Some(template) = &annotationdata.key {
2919 let compiled_template = self.template_engine.template(template.as_str());
2920 match compiled_template.render(&context).to_string().map_err(|e|
2921 XmlConversionError::TemplateError(
2922 format!(
2923 "whilst rendering annotationdata/key template '{}' for metadata",
2924 template,
2925 ),
2926 Some(e),
2927 )
2928 ) {
2929 Ok(key) if !key.is_empty() =>
2930 databuilder = databuilder.with_key(key.into()) ,
2931 Ok(_) if !annotationdata.skip_if_missing => {
2932 return Err(XmlConversionError::TemplateError(
2933 format!(
2934 "whilst rendering annotationdata/key template '{}' metadata",
2935 template,
2936 ),
2937 None
2938 ));
2939 },
2940 Err(e) if !annotationdata.skip_if_missing => {
2941 return Err(e)
2942 },
2943 _ => {
2944 continue
2946 }
2947 }
2948 }
2949 if let Some(value) = &annotationdata.value {
2950 match self.extract_value_metadata(value, &upon::Value::Map(context.clone()), annotationdata.allow_empty_value, annotationdata.skip_if_missing, resource_id.as_deref())? {
2951 Some(value) => {
2952 databuilder = databuilder.with_value(value);
2953 },
2954 None => {
2955 continue
2957 }
2958 }
2959 }
2960 builder = builder.with_data_builder(databuilder);
2961 }
2962
2963
2964
2965 match metadata.annotation {
2967 XmlAnnotationHandling::TextSelector => {
2968 builder = builder.with_target(SelectorBuilder::TextSelector(BuildItem::Handle(self.resource_handle.expect("resource must have handle")), Offset::whole()));
2970 if self.config.debug {
2971 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2972 }
2973 store.annotate(builder)?;
2974 }
2975 XmlAnnotationHandling::ResourceSelector | XmlAnnotationHandling::None | XmlAnnotationHandling::Unspecified => {
2976 builder = builder.with_target(SelectorBuilder::ResourceSelector(
2978 self.resource_handle.into(),
2979 ));
2980 if self.config.debug {
2981 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
2982 }
2983 store.annotate(builder)?;
2984 }
2985 _ => panic!(
2986 "Invalid annotationhandling for metadata: {:?}",
2987 metadata.annotation
2988 ),
2989 }
2990 }
2991 Ok(())
2992 }
2993}
2994
2995
2996
2997fn recursive_text(node: &Node) -> String {
2999 let mut s = String::new();
3000 for child in node.children() {
3001 if child.is_text() {
3002 s += child.text().expect("should have text");
3003 } else if child.is_element() {
3004 s += &recursive_text(&child);
3005 }
3006 }
3007 s
3008}
3009
3010fn filter_capitalize(s: &str) -> String {
3012 let mut out = String::with_capacity(s.len());
3013 for (i, c) in s.chars().enumerate() {
3014 if i == 0 {
3015 out.push_str(&c.to_uppercase().collect::<String>())
3016 } else {
3017 out.push(c);
3018 }
3019 }
3020 out
3021}
3022
3023fn filter_gt(a: &upon::Value, b: &upon::Value) -> bool {
3024 match (a, b) {
3025 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a > *b,
3026 (upon::Value::Float(a), upon::Value::Float(b)) => *a > *b,
3027 (upon::Value::String(a), upon::Value::String(b)) => *a > *b,
3028 _ => false,
3029 }
3030}
3031
3032fn filter_lt(a: &upon::Value, b: &upon::Value) -> bool {
3033 match (a, b) {
3034 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a < *b,
3035 (upon::Value::Float(a), upon::Value::Float(b)) => *a < *b,
3036 (upon::Value::String(a), upon::Value::String(b)) => *a < *b,
3037 _ => false,
3038 }
3039}
3040
3041fn filter_gte(a: &upon::Value, b: &upon::Value) -> bool {
3042 match (a, b) {
3043 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a >= *b,
3044 (upon::Value::Float(a), upon::Value::Float(b)) => *a >= *b,
3045 (upon::Value::String(a), upon::Value::String(b)) => *a >= *b,
3046 _ => false,
3047 }
3048}
3049
3050fn filter_lte(a: &upon::Value, b: &upon::Value) -> bool {
3051 match (a, b) {
3052 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a <= *b,
3053 (upon::Value::Float(a), upon::Value::Float(b)) => *a <= *b,
3054 (upon::Value::String(a), upon::Value::String(b)) => *a <= *b,
3055 _ => false,
3056 }
3057}
3058
3059fn filter_add(a: &upon::Value, b: &upon::Value) -> upon::Value {
3060 match (a, b) {
3061 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a + b),
3062 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a + b),
3063 (upon::Value::String(a), upon::Value::String(b)) => upon::Value::String(a.clone() + b),
3064 _ => upon::Value::None,
3065 }
3066}
3067
3068fn filter_sub(a: &upon::Value, b: &upon::Value) -> upon::Value {
3069 match (a, b) {
3070 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a - b),
3071 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a - b),
3072 _ => upon::Value::None,
3073 }
3074}
3075
3076fn filter_mul(a: &upon::Value, b: &upon::Value) -> upon::Value {
3077 match (a, b) {
3078 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a * b),
3079 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a * b),
3080 _ => upon::Value::None,
3081 }
3082}
3083
3084fn filter_div(a: &upon::Value, b: &upon::Value) -> upon::Value {
3085 match (a, b) {
3086 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a / b),
3087 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a / b),
3088 _ => upon::Value::None,
3089 }
3090}
3091
3092
3093fn map_value(value: &toml::Value) -> upon::Value {
3095 match value {
3096 toml::Value::String(s) => upon::Value::String(s.clone()),
3097 toml::Value::Integer(i) => upon::Value::Integer(*i),
3098 toml::Value::Float(i) => upon::Value::Float(*i),
3099 toml::Value::Boolean(v) => upon::Value::Bool(*v),
3100 toml::Value::Datetime(s) => upon::Value::String(s.to_string()),
3101 toml::Value::Array(v) => upon::Value::List(v.iter().map(|i| map_value(i)).collect()),
3102 toml::Value::Table(v) => upon::Value::Map(v.iter().map(|(k,i)| (k.clone(),map_value(i))).collect()),
3103 }
3104}
3105
3106#[inline]
3108fn string_to_datavalue(value: String, valuetype: Option<&str>) -> Result<DataValue,XmlConversionError> {
3109 match valuetype {
3110 Some("str") | Some("string") => Ok(DataValue::String(value)),
3111 Some("int") => {
3112 if let Ok(value) = value.parse::<isize>() {
3113 Ok(DataValue::Int(value))
3114 } else {
3115 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3116 }
3117 },
3118 Some("float") => {
3119 if let Ok(value) = value.parse::<f64>() {
3120 Ok(DataValue::Float(value))
3121 } else {
3122 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3123 }
3124 },
3125 Some("bool") => match value.as_str() {
3126 "yes" | "true" | "enabled" | "on" | "1" | "active" => Ok(DataValue::Bool(true)),
3127 _ => Ok(DataValue::Bool(false))
3128 },
3129 Some(x) => {
3130 Err(XmlConversionError::TemplateError(format!("Invalid valuetype: {}", x), None))
3131 }
3132 None => {
3133 if let Ok(value) = value.parse::<isize>() {
3135 Ok(DataValue::Int(value))
3136 } else if let Ok(value) = value.parse::<f64>() {
3137 Ok(DataValue::Float(value))
3138 } else if value.starts_with("(list) [ ") && value.ends_with(" ]") {
3139 if let Ok(serde_json::Value::Array(values)) = serde_json::from_str(&value[6..]) {
3141 Ok(DataValue::List(values.into_iter().map(|v| {
3142 match v {
3143 serde_json::Value::String(s) => DataValue::String(s),
3144 serde_json::Value::Number(n) => if let Some(n) = n.as_i64() {
3145 DataValue::Int(n as isize)
3146 } else if let Some(n) = n.as_f64() {
3147 DataValue::Float(n)
3148 } else {
3149 unreachable!("number should always be either int or float")
3150 },
3151 serde_json::Value::Bool(b) => DataValue::Bool(b),
3152 _ => DataValue::Null, }
3154 }).collect()))
3155 } else {
3156 Err(XmlConversionError::TemplateError(format!("Unable to deserialize list value: {}", value), None))
3157 }
3158 } else {
3159 Ok(value.into())
3160 }
3161 }
3162 }
3163}
3164
3165fn string_to_templatevalue(value: String) -> upon::Value {
3166 if let Ok(value) = value.parse::<i64>() {
3167 upon::Value::Integer(value)
3168 } else if let Ok(value) = value.parse::<f64>() {
3169 upon::Value::Float(value)
3170 } else {
3171 upon::Value::String(value)
3172 }
3173}
3174
3175fn value_formatter(f: &mut upon::fmt::Formatter<'_>, value: &upon::Value) -> upon::fmt::Result {
3178 match value {
3179 upon::Value::List(vs) => {
3180 f.write_str("(list) [ ")?;
3181 for (i, v) in vs.iter().enumerate() {
3182 if i > 0 {
3183 f.write_str(", ")?;
3184 }
3185 if let upon::Value::String(s) = v {
3186 write!(f, "\"{}\"", s.replace("\"","\\\"").replace("\n"," ").split_whitespace().collect::<Vec<_>>().join(" "))?;
3187 } else {
3188 upon::fmt::default(f, v)?;
3189 f.write_char('"')?;
3190 }
3191 }
3192 f.write_str(" ]")?;
3193 }
3194 v => upon::fmt::default(f, v)?, };
3196 Ok(())
3197}
3198
3199#[derive(Clone,Debug,Deserialize)]
3200struct ExternalFilter {
3201 name: String,
3203
3204 command: String,
3206
3207 args: Vec<String>
3209}
3210
3211impl ExternalFilter {
3212 fn run(&self, input_value: &upon::Value) -> upon::Value {
3214 let process = Command::new(self.command.as_str()).args(
3215 self.args.iter().map(|x| if x == "{{value}}" || x == "{{ value }}" || x == "$value" {
3217 match input_value {
3218 upon::Value::String(s) => s.clone(),
3219 upon::Value::Integer(d) => format!("{}",d),
3220 upon::Value::Float(d) => format!("{}",d),
3221 upon::Value::Bool(d) => format!("{}",d),
3222 upon::Value::None => String::new(),
3223 _ => panic!("Lists and maps are not supported to be passed as parameter to external filters yet!"),
3224 }
3225 } else {
3226 x.clone() })
3228 ).stdin(Stdio::piped()).stdout(Stdio::piped()).spawn();
3229
3230
3231 if let Ok(mut process) = process {
3232 {
3233 let mut outstdin = process.stdin.take().expect("unable to open stdin for external filter");
3234 let mut writer = BufWriter::new(&mut outstdin);
3235 match input_value {
3236 upon::Value::String(s) => writer.write(s.as_bytes()),
3237 upon::Value::Integer(d) => writer.write(format!("{}",d).as_bytes()),
3238 upon::Value::Float(d) => writer.write(format!("{}",d).as_bytes()),
3239 upon::Value::Bool(d) => writer.write(format!("{}",d).as_bytes()),
3240 upon::Value::None => writer.write(&[]),
3241 _ => panic!("Lists and maps are not supported to be passed as input to external filters yet!"),
3242 }.expect("Writing to stdin for external filter failed!");
3243 }
3245 let output = process.wait_with_output().expect("External filter wasn't running");
3246 if !output.status.success() {
3247 panic!("External filter {} failed ({:?})", self.name, output.status.code());
3248 }
3249 if let Ok(s) = String::from_utf8(output.stdout) {
3250 return string_to_templatevalue(s);
3251 } else {
3252 panic!("External filter {} produced invalid UTF-8!", self.name);
3253 }
3254 }
3255 panic!("External filter {} failed!", self.name);
3256 }
3257}
3258
3259#[cfg(test)]
3260mod tests {
3261 use super::*;
3262 const XMLSMALLEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3265<head><title>test</title></head><body><h1>TEST</h1><p xml:id="p1" n="001">This is a <em xml:id="emphasis" style="color:green">test</em>.</p></body></html>"#;
3266
3267 const XMLEXAMPLE: &'static str = r#"<!DOCTYPE entities[<!ENTITY nbsp " ">]>
3268<html xmlns="http://www.w3.org/1999/xhtml" xmlns:my="http://example.com">
3269<head>
3270 <title>Test</title>
3271 <meta name="author" content="proycon" />
3272</head>
3273<body>
3274 <h1>Header</h1>
3275
3276 <p xml:id="par1">
3277 <span xml:id="sen1">This is a sentence.</span>
3278 <span xml:id="sen2">This is the second sentence.</span>
3279 </p>
3280 <p xml:id="par2">
3281 <strong>This</strong> is the <em>second</em> paragraph.
3282 It has a <strong>bold</strong> word and one in <em>italics</em>.<br/>
3283 Let's highlight stress in the following word: <span my:stress="secondary">re</span>pu<span my:stress="primary">ta</span>tion.
3284 </p>
3285 <p xml:space="preserve"><![CDATA[This third
3286paragraph consists
3287of CDATA and is configured to preserve whitespace, and weird &entities; ]]></p>
3288
3289 <h2>Subsection</h2>
3290
3291 <p>
3292 Have some fruits:<br/>
3293 <ul xml:id="list1" class="fruits">
3294 <li xml:id="fruit1">apple</li>
3295 <li xml:id="fruit2">banana</li>
3296 <li xml:id="fruit3">melon</li>
3297 </ul>
3298 </p>
3299
3300 Some lingering text outside of any confines...
3301</body>
3302</html>"#;
3303
3304 const XMLEXAMPLE_TEXTOUTPUT: &'static str = "Header\n\nThis is a sentence. This is the second sentence.\n\nThis is the second paragraph. It has a bold word and one in italics.\nLet's highlight stress in the following word: reputation.\n\nThis third\nparagraph consists\nof CDATA and is configured to preserve whitespace, and weird &entities; \nSubsection\n\nHave some fruits:\n* apple\n* banana\n* melon\n\nSome lingering text outside of any confines...";
3305
3306 const XMLTEISPACE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3308<body><space dim="vertical" unit="lines" quantity="3" /></body></html>"#;
3309
3310 const CONF: &'static str = r#"#default whitespace handling (Collapse or Preserve)
3311whitespace = "Collapse"
3312default_set = "urn:stam-fromhtml"
3313
3314[namespaces]
3315#this defines the namespace prefixes you can use in this configuration
3316xml = "http://www.w3.org/XML/1998/namespace"
3317html = "http://www.w3.org/1999/xhtml"
3318xsd = "http://www.w3.org/2001/XMLSchema"
3319xlink = "http://www.w3.org/1999/xlink"
3320
3321# elements and attributes are matched in reverse-order, so put more generic statements before more specific ones
3322
3323#Define some base elements that we reuse later for actual elements (prevents unnecessary repetition)
3324[baseelements.common]
3325id = "{% if ?.@xml:id %}{{ @xml:id }}{% endif %}"
3326
3327 [[baseelements.common.annotationdata]]
3328 key = "type"
3329 value = "{{ localname }}"
3330
3331 [[baseelements.common.annotationdata]]
3332 key = "lang"
3333 value = "{{ @xml:lang }}"
3334 skip_if_missing = true
3335
3336 [[baseelements.common.annotationdata]]
3337 key = "n"
3338 value = "{{ @n }}"
3339 skip_if_missing = true
3340 valuetype = "int"
3341
3342 [[baseelements.common.annotationdata]]
3343 key = "nstring"
3344 value = "{{ @n }}"
3345 skip_if_missing = true
3346 valuetype = "string"
3347
3348 [[baseelements.common.annotationdata]]
3349 key = "style"
3350 value = "{{ @style }}"
3351 skip_if_missing = true
3352
3353 [[baseelements.common.annotationdata]]
3354 key = "class"
3355 value = "{{ @class }}"
3356 skip_if_missing = true
3357
3358 [[baseelements.common.annotationdata]]
3359 key = "src"
3360 value = "{{ @src }}"
3361 skip_if_missing = true
3362
3363[baseelements.text]
3364text = true
3365
3366
3367[[elements]]
3368base = [ "text", "common" ]
3369path = "*"
3370text = true
3371annotation = "TextSelector"
3372
3373# Pass through the following elements without mapping to text
3374[[elements]]
3375base = [ "common" ]
3376path = "//html:head"
3377
3378[[elements]]
3379base = [ "common" ]
3380path = "//html:head//*"
3381
3382# Map metadata like <meta name="key" content="value"> to annotations with key->value data selecting the resource (ResourceSelector)
3383[[elements]]
3384base = [ "common" ]
3385path = "//html:head//html:meta"
3386
3387[[elements.annotationdata]]
3388key = "{% if ?.@name %}{{ name }}{% endif %}"
3389value = "{% if ?.@content %}{{ @content }}{% endif %}"
3390skip_if_missing = true
3391
3392# By default, ignore any tags in the head (unless they're mentioned specifically later in the config)
3393[[elements]]
3394path = "//html:head/html:title"
3395annotation = "ResourceSelector"
3396
3397[[elements.annotationdata]]
3398key = "title"
3399value = "{{ $. | trim }}"
3400
3401
3402# Determine how various structural elements are converted to text
3403
3404[[elements]]
3405base = [ "common" ]
3406path = "//html:br"
3407textsuffix = "\n"
3408
3409[[elements]]
3410base = [ "common", "text" ]
3411path = "//html:p"
3412textprefix = "\n"
3413textsuffix = "\n"
3414annotation = "TextSelector"
3415
3416# Let's do headers and bulleted lists like markdown
3417[[elements]]
3418base = [ "common", "text" ]
3419path = "//html:h1"
3420textsuffix = "\n"
3421
3422[[elements]]
3423base = [ "common", "text" ]
3424path = "//html:h2"
3425textsuffix = "\n"
3426
3427#Generic, will be overriden by more specific one
3428[[elements]]
3429base = [ "common", "text" ]
3430path = "//html:li"
3431textprefix = "- "
3432textsuffix = "\n"
3433
3434[[elements]]
3435base = [ "common", "text" ]
3436path = """//html:body"""
3437annotation = "TextSelector"
3438id = "body"
3439
3440 [[elements.annotationdata]]
3441 key = "title_from_parent"
3442 value = "{{ $../html:head/html:title }}"
3443 skip_if_missing = true
3444
3445 [[elements.annotationdata]]
3446 key = "title_from_root"
3447 value = "{{ $/html:html/html:head/html:title }}"
3448 skip_if_missing = true
3449
3450 [[elements.annotationdata]]
3451 key = "firstfruit"
3452 value = """{{ $./html:p/html:ul/html:li }}"""
3453 skip_if_missing = true
3454
3455 [[elements.annotationdata]]
3456 key = "fruits"
3457 value = """{{ $$./html:p/html:ul/html:li }}"""
3458 skip_if_missing = true
3459
3460 [[elements.annotationdata]]
3461 key = "multifruits"
3462 value = """{{ $$./html:p/html:ul/html:li }}"""
3463 skip_if_missing = true
3464 multiple = true
3465
3466#More specific one takes precendence over the above generic one
3467[[elements]]
3468base = [ "common", "text" ]
3469path = """//html:ul[@class="fruits"]/html:li"""
3470textprefix = "* "
3471textsuffix = "\n"
3472
3473#Not real HTML, test-case modelled after TEI space
3474[[elements]]
3475base = [ "common" ]
3476path = """//html:space[@dim="vertical" and @unit="lines"]"""
3477text = true
3478textsuffix = """\n{% for x in @quantity | int | as_range %}\n{% endfor %}"""
3479
3480
3481[[elements]]
3482base = [ "common", "text" ]
3483path = "//html:example"
3484annotation = "TextSelector"
3485
3486[[elements.annotationdata]]
3487key = "requiredattrib"
3488value = "{{ @requiredattrib }}"
3489
3490[[elements.annotationdata]]
3491key = "optattrib"
3492value = "{{ ?.@optattrib }}"
3493
3494[[elements]]
3495base = [ "common","text" ]
3496path = "//html:marquee"
3497annotation = "TextSelector"
3498
3499#map value, some bogus data to test parsing
3500[[elements.annotationdata]]
3501key = "map"
3502
3503[elements.annotationdata.value]
3504text = "{{ $. }}"
3505number = 42
3506bogus = true
3507
3508[[metadata]]
3509id = "metadata"
3510
3511[[metadata.annotationdata]]
3512key = "author"
3513value = "proycon"
3514"#;
3515
3516 const XMLREQATTRIBEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3517<body><example xml:id="ann1" requiredattrib="blah">test</example></body></html>"#;
3518
3519 const XMLREQATTRIBEXAMPLE2: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3520<body><example xml:id="ann1">test</example></body></html>"#;
3521
3522 const XMLREQATTRIBEXAMPLE3: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3523<body><example xml:id="ann1" requiredattrib="blah" optattrib="blah">test</example></body></html>"#;
3524
3525 const XMLMAPEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3526<body><marquee xml:id="ann1">test</marquee></body></html>"#;
3527
3528 #[test]
3529 fn test_precompile_template_nochange() -> Result<(), String> {
3530 let config = XmlConversionConfig::new();
3531 let mut conv = XmlToStamConverter::new(&config);
3532 let template_in = "{{ foo }}";
3533 let template_out = conv.precompile(template_in);
3534 assert_eq!( template_out, template_in);
3535 assert!(!conv.variables.get(template_in).as_ref().unwrap().contains("foo"));
3537 Ok(())
3538 }
3539
3540 #[test]
3541 fn test_precompile_template_attrib() -> Result<(), String> {
3542 let config = XmlConversionConfig::new();
3543 let mut conv = XmlToStamConverter::new(&config);
3544 let template_in = "{{ @foo }}";
3545 let template_out = conv.precompile(template_in);
3546 assert_eq!(template_out, "{{ ATTRIB_foo }}");
3547 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3549 Ok(())
3550 }
3551
3552 #[test]
3553 fn test_precompile_template_attrib_ns() -> Result<(), String> {
3554 let config = XmlConversionConfig::new();
3555 let mut conv = XmlToStamConverter::new(&config);
3556 let template_in = "{{ @bar:foo }}";
3557 let template_out = conv.precompile(template_in);
3558 assert_eq!(template_out, "{{ ATTRIB_bar__foo }}");
3559 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@bar:foo"));
3561 Ok(())
3562 }
3563
3564 #[test]
3565 fn test_precompile_template_element() -> Result<(), String> {
3566 let config = XmlConversionConfig::new();
3567 let mut conv = XmlToStamConverter::new(&config);
3568 let template_in = "{{ $foo }}";
3569 let template_out = conv.precompile(template_in);
3570 assert_eq!(template_out, "{{ ELEMENT_foo }}");
3571 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$foo"));
3573 Ok(())
3574 }
3575
3576 #[test]
3577 fn test_precompile_template_element_ns() -> Result<(), String> {
3578 let config = XmlConversionConfig::new();
3579 let mut conv = XmlToStamConverter::new(&config);
3580 let template_in = "{{ $bar:foo }}";
3581 let template_out = conv.precompile(template_in);
3582 assert_eq!(template_out, "{{ ELEMENT_bar__foo }}");
3583 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$bar:foo"));
3585 Ok(())
3586 }
3587
3588 #[test]
3589 fn test_precompile_template_this_text() -> Result<(), String> {
3590 let config = XmlConversionConfig::new();
3591 let mut conv = XmlToStamConverter::new(&config);
3592 let template_in = "{{ $. }}";
3593 let template_out = conv.precompile(template_in);
3594 assert_eq!(template_out, "{{ ELEMENT_THIS }}");
3595 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$."));
3596 Ok(())
3597 }
3598
3599 #[test]
3600 fn test_precompile_template_parent_text() -> Result<(), String> {
3601 let config = XmlConversionConfig::new();
3602 let mut conv = XmlToStamConverter::new(&config);
3603 let template_in = "{{ $.. }}";
3604 let template_out = conv.precompile(template_in);
3605 assert_eq!(template_out, "{{ ELEMENT_PARENT }}");
3606 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$.."));
3607 Ok(())
3608 }
3609
3610 #[test]
3611 fn test_precompile_template_elements() -> Result<(), String> {
3612 let config = XmlConversionConfig::new();
3613 let mut conv = XmlToStamConverter::new(&config);
3614 let template_in = "{{ $$foo }}";
3615 let template_out = conv.precompile(template_in);
3616 assert_eq!(template_out, "{{ ELEMENTS_foo }}");
3617 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$foo"));
3618 Ok(())
3619 }
3620
3621 #[test]
3622 fn test_precompile_template_elements_ns() -> Result<(), String> {
3623 let config = XmlConversionConfig::new();
3624 let mut conv = XmlToStamConverter::new(&config);
3625 let template_in = "{{ $$bar:foo }}";
3626 let template_out = conv.precompile(template_in);
3627 assert_eq!(template_out, "{{ ELEMENTS_bar__foo }}");
3628 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$bar:foo"));
3629 Ok(())
3630 }
3631
3632
3633 #[test]
3634 fn test_precompile_template_attrib2() -> Result<(), String> {
3635 let config = XmlConversionConfig::new();
3636 let mut conv = XmlToStamConverter::new(&config);
3637 let template_in = "{% for x in @foo %}";
3638 let template_out = conv.precompile(template_in);
3639 assert_eq!(template_out, "{% for x in ATTRIB_foo %}");
3640 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3642 Ok(())
3643 }
3644
3645 #[test]
3646 fn test_precompile_template_attrib3() -> Result<(), String> {
3647 let config = XmlConversionConfig::new();
3648 let mut conv = XmlToStamConverter::new(&config);
3649 let template_in = "{{ ?.@foo }}";
3650 let template_out = conv.precompile(template_in);
3651 assert_eq!(template_out, "{{ ?.ATTRIB_foo }}");
3652 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3653 Ok(())
3654 }
3655
3656 #[test]
3657 fn test_precompile_template_path() -> Result<(), String> {
3658 let config = XmlConversionConfig::new();
3659 let mut conv = XmlToStamConverter::new(&config);
3660 let template_in = "{{ $x/y/z/@a }}";
3661 let template_out = conv.precompile(template_in);
3662 assert_eq!(template_out, "{{ ELEMENT_x_IN_y_IN_z_IN_ATTRIB_a }}");
3663 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$x/y/z/@a"));
3664 Ok(())
3665 }
3666
3667 #[test]
3668 fn test_loadconfig() -> Result<(), String> {
3669 let config = XmlConversionConfig::from_toml_str(CONF)?;
3670 let mut conv = XmlToStamConverter::new(&config);
3671 conv.compile().map_err(|e| format!("{}",e))?;
3672 assert_eq!(conv.config.namespaces.len(),4 , "number of namespaces");
3673 assert_eq!(conv.config.elements.len(), 15, "number of elements");
3674 assert_eq!(conv.config.baseelements.len(), 2, "number of baseelements");
3675 assert_eq!(conv.config.elements.get(0).unwrap().annotationdata.len(), 7,"number of annotationdata under first element");
3676 assert_eq!(conv.config.baseelements.get("common").unwrap().annotationdata.len(), 7,"number of annotationdata under baseelement common");
3677 Ok(())
3678 }
3679
3680 #[test]
3681 fn test_small() -> Result<(), String> {
3682 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3683 let mut store = stam::AnnotationStore::new(stam::Config::new());
3684 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3685 let res = store.resource("test").expect("resource must have been created at this point");
3686 assert_eq!(res.text(), "TEST\n\nThis is a test.\n", "resource text");
3687 assert_eq!(store.annotations_len(), 6, "number of annotations");
3688 let annotation = store.annotation("emphasis").expect("annotation must have been created at this point");
3689 assert_eq!(annotation.text_simple(), Some("test"));
3690 let key = store.key("urn:stam-fromhtml", "style").expect("key must exist");
3692 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("color:green"));
3693 let key = store.key("urn:stam-fromhtml", "title").expect("key must exist");
3694 let annotation = res.annotations_as_metadata().filter_key(&key).next().expect("annotation");
3695 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("test"));
3696 let bodyannotation = store.annotation("body").expect("body annotation not found");
3697 let title1 = store.key("urn:stam-fromhtml", "title_from_parent").expect("key must exist");
3698 let title2 = store.key("urn:stam-fromhtml", "title_from_root").expect("key must exist");
3699 assert_eq!(bodyannotation.data().filter_key(&title1).value_as_str(), Some("test"));
3700 assert_eq!(bodyannotation.data().filter_key(&title2).value_as_str(), Some("test"));
3701 Ok(())
3702 }
3703
3704 #[test]
3705 fn test_full() -> Result<(), String> {
3706 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3707 let mut store = stam::AnnotationStore::new(stam::Config::new());
3708 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3709 let res = store.resource("test").expect("resource must have been created at this point");
3710 assert_eq!(res.text(), XMLEXAMPLE_TEXTOUTPUT, "resource text");
3711 Ok(())
3712 }
3713
3714 #[test]
3715 fn test_firstfruit() -> Result<(), String> {
3716 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3717 let mut store = stam::AnnotationStore::new(stam::Config::new());
3718 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3719 let bodyannotation = store.annotation("body").expect("body annotation not found");
3720 let fruit = store.key("urn:stam-fromhtml", "firstfruit").expect("key must exist");
3721 assert_eq!(bodyannotation.data().filter_key(&fruit).value_as_str(), Some("apple") );
3722 Ok(())
3723 }
3724
3725 #[test]
3726 fn test_fruits() -> Result<(), String> {
3727 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3728 let mut store = stam::AnnotationStore::new(stam::Config::new());
3729 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3730 let bodyannotation = store.annotation("body").expect("body annotation not found");
3731 let fruits = store.key("urn:stam-fromhtml", "fruits").expect("key must exist");
3732 assert_eq!(bodyannotation.data().filter_key(&fruits).value(), Some(&DataValue::List(vec!("apple".into(),"banana".into(),"melon".into()) )));
3733 Ok(())
3734 }
3735
3736 #[test]
3737 fn test_multifruits() -> Result<(), String> {
3738 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3739 let mut store = stam::AnnotationStore::new(stam::Config::new());
3740 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3741 let bodyannotation = store.annotation("body").expect("body annotation not found");
3742 let fruits = store.key("urn:stam-fromhtml", "multifruits").expect("key must exist");
3743 let results: Vec<_> = bodyannotation.data().filter_key(&fruits).collect();
3744 assert_eq!(results.len(), 3);
3745 assert_eq!(results.get(0).unwrap().value(),&DataValue::String("apple".to_string()) );
3746 assert_eq!(results.get(1).unwrap().value(),&DataValue::String("banana".to_string()) );
3747 assert_eq!(results.get(2).unwrap().value(),&DataValue::String("melon".to_string()) );
3748 Ok(())
3749 }
3750
3751 #[test]
3752 fn test_teispace() -> Result<(), String> {
3753 let config = XmlConversionConfig::from_toml_str(CONF)?;
3754 let mut store = stam::AnnotationStore::new(stam::Config::new());
3755 from_xml_in_memory("test", XMLTEISPACE, &config, &mut store)?;
3756 let res = store.resource("test").expect("resource must have been created at this point");
3757 assert_eq!(res.text(), "\n\n\n\n", "resource text");
3758 Ok(())
3759 }
3760
3761
3762 #[test]
3763 fn test_reqattrib() -> Result<(), String> {
3764 let config = XmlConversionConfig::from_toml_str(CONF)?;
3765 let mut store = stam::AnnotationStore::new(stam::Config::new());
3766 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE, &config, &mut store)?;
3767 let res = store.resource("test").expect("resource must have been created at this point");
3768 assert_eq!(res.text(), "test", "resource text");
3769 let key = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3770 let annotation = store.annotation("ann1").expect("annotation");
3771 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("blah"));
3772 assert!(store.key("urn:stam-fromhtml", "optattrib").is_none(), "optional attrib is unused");
3773 Ok(())
3774 }
3775
3776 #[test]
3777 fn test_reqattrib2() -> Result<(), String> {
3778 let mut config = XmlConversionConfig::from_toml_str(CONF)?;
3779 config = config.with_debug(true);
3780 let mut store = stam::AnnotationStore::new(stam::Config::new());
3781 assert!(from_xml_in_memory("test", XMLREQATTRIBEXAMPLE2, &config, &mut store).is_err(), "checking if error is returned");
3782 Ok(())
3783 }
3784
3785 #[test]
3786 fn test_reqattrib3() -> Result<(), String> {
3787 let config = XmlConversionConfig::from_toml_str(CONF)?;
3788 let mut store = stam::AnnotationStore::new(stam::Config::new());
3789 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE3, &config, &mut store)?;
3790 let res = store.resource("test").expect("resource must have been created at this point");
3791 assert_eq!(res.text(), "test", "resource text");
3792 let reqkey = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3793 let optkey = store.key("urn:stam-fromhtml", "optattrib").expect("key optattrib must exist");
3794 let annotation = store.annotation("ann1").expect("annotation");
3795 assert_eq!(annotation.data().filter_key(&reqkey).value_as_str(), Some("blah"));
3796 assert_eq!(annotation.data().filter_key(&optkey).value_as_str(), Some("blah"));
3797 Ok(())
3798 }
3799
3800 #[test]
3801 fn test_map() -> Result<(), String> {
3802 let config = XmlConversionConfig::from_toml_str(CONF)?;
3803 let mut store = stam::AnnotationStore::new(stam::Config::new());
3804 from_xml_in_memory("test", XMLMAPEXAMPLE, &config, &mut store)?;
3805 let res = store.resource("test").expect("resource must have been created at this point");
3806 assert_eq!(res.text(), "test", "resource text");
3807 let key = store.key("urn:stam-fromhtml", "map").expect("key must exist");
3808 let annotation = store.annotation("ann1").expect("annotation");
3809 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3810 if let DataValue::Map(data) = data {
3811 assert_eq!(data.get("text"), Some(&DataValue::String("test".into())));
3812 assert_eq!(data.get("number"), Some(&DataValue::Int(42)));
3813 assert_eq!(data.get("bogus"), Some(&DataValue::Bool(true)));
3814 assert_eq!(data.len(), 3);
3815 } else {
3816 assert!(false, "Data is supposed to be a map");
3817 }
3818 Ok(())
3819 }
3820
3821 #[test]
3822 fn test_metadata() -> Result<(), String> {
3823 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3824 let mut store = stam::AnnotationStore::new(stam::Config::new());
3825 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3826 let annotation = store.annotation("metadata").expect("annotation");
3827 let key = store.key("urn:stam-fromhtml", "author").expect("key must exist");
3828 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3829 assert_eq!(data, &DataValue::String("proycon".into()));
3830 Ok(())
3831 }
3832
3833 #[test]
3834 fn test_datavalue_int() -> Result<(), String> {
3835 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3836 let mut store = stam::AnnotationStore::new(stam::Config::new());
3837 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3838 let annotation = store.annotation("p1").expect("annotation not found");
3839 let key = store.key("urn:stam-fromhtml", "n").expect("key must exist");
3840 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::Int(1)));
3841 Ok(())
3842 }
3843
3844 #[test]
3845 fn test_datavalue_string() -> Result<(), String> {
3846 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3847 let mut store = stam::AnnotationStore::new(stam::Config::new());
3848 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3849 let annotation = store.annotation("p1").expect("annotation not found");
3850 let key = store.key("urn:stam-fromhtml", "nstring").expect("key must exist");
3851 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::String("001".to_string())));
3852 Ok(())
3853 }
3854
3855}