1use std::borrow::Cow;
2use std::collections::{BTreeMap, HashMap, BTreeSet};
3use std::fmt::Display;
4use std::fs::read_to_string;
5use std::path::Path;
6use std::hash::{Hash,DefaultHasher,Hasher};
7use std::process::{Command, Stdio};
8use std::io::{ BufWriter, Write};
9
10use roxmltree::{Document, Node, NodeId, ParsingOptions};
11use serde::Deserialize;
12use stam::*;
13use toml;
14use upon::Engine;
15use std::fmt::Write as FmtWrite;
16use serde_json;
17
18const NS_XML: &str = "http://www.w3.org/XML/1998/namespace";
19const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
20
21
22fn default_set() -> String {
23 "urn:stam-fromxml".into()
24}
25
26#[derive(Deserialize)]
27pub struct XmlConversionConfig {
29 #[serde(default)]
30 elements: Vec<XmlElementConfig>,
32
33 #[serde(default)]
34 baseelements: HashMap<String, XmlElementConfig>,
36
37 #[serde(default)]
38 namespaces: HashMap<String, String>,
40
41 #[serde(default = "XmlWhitespaceHandling::collapse")]
42 whitespace: XmlWhitespaceHandling,
44
45 #[serde(default)]
46 context: HashMap<String, toml::Value>,
48
49 #[serde(default)]
50 metadata: Vec<MetadataConfig>,
52
53 #[serde(default)]
54 inject_dtd: Option<String>,
56
57 #[serde(default = "default_set")]
58 default_set: String,
59
60 #[serde(default)]
61 id_prefix: Option<String>,
63
64 #[serde(default)]
65 id_strip_suffix: Vec<String>,
67
68 #[serde(default)]
69 provenance: bool,
71
72 #[serde(default)]
73 external_filters: Vec<ExternalFilter>,
74
75 #[serde(skip_deserializing)]
76 debug: bool,
77
78}
79
80impl XmlConversionConfig {
81 pub fn new() -> Self {
82 Self {
83 elements: Vec::new(),
84 baseelements: HashMap::new(),
85 namespaces: HashMap::new(),
86 context: HashMap::new(),
87 metadata: Vec::new(),
88 whitespace: XmlWhitespaceHandling::Collapse,
89 default_set: default_set(),
90 inject_dtd: None,
91 id_prefix: None,
92 id_strip_suffix: Vec::new(),
93 provenance: false,
94 external_filters: Vec::new(),
95 debug: false,
96 }
97 }
98
99 pub fn resolve_baseelements(&mut self) -> Result<(), XmlConversionError> {
100 let mut replace: Vec<(usize, XmlElementConfig)> = Vec::new();
101 for (i, element) in self.elements.iter().enumerate() {
102 let mut newelement = None;
103 for basename in element.base.iter().rev() {
104 if let Some(baseelement) = self.baseelements.get(basename) {
105 if newelement.is_none() {
106 newelement = Some(element.clone());
107 }
108 newelement
109 .as_mut()
110 .map(|newelement| newelement.update(baseelement));
111 } else {
112 return Err(XmlConversionError::ConfigError(format!(
113 "No such base element: {}",
114 basename
115 )));
116 }
117 }
118 if let Some(newelement) = newelement {
119 replace.push((i, newelement));
120 }
121 }
122 for (i, element) in replace {
123 self.elements[i] = element;
124 }
125 Ok(())
126 }
127
128 pub fn from_toml_str(tomlstr: &str) -> Result<Self, String> {
130 let mut config: Self = toml::from_str(tomlstr).map_err(|e| format!("{}", e))?;
131 config.resolve_baseelements().map_err(|e| format!("{}", e))?;
132 Ok(config)
133 }
134
135 pub fn with_debug(mut self, value: bool) -> Self {
136 self.debug = value;
137 self
138 }
139
140 pub fn with_provenance(mut self, value: bool) -> Self {
142 self.provenance = value;
143 self
144 }
145
146 pub fn with_prefix(mut self, prefix: impl Into<String>, namespace: impl Into<String>) -> Self {
148 self.namespaces.insert(prefix.into(), namespace.into());
149 self
150 }
151
152 pub fn with_id_prefix(mut self, prefix: impl Into<String>) -> Self {
154 self.id_prefix = Some(prefix.into());
155 self
156 }
157
158 pub fn with_id_strip_suffix(mut self, suffix: impl Into<String>) -> Self {
160 self.id_strip_suffix.push(suffix.into());
161 self
162 }
163
164 pub fn with_inject_dtd(mut self, dtd: impl Into<String>) -> Self {
166 self.inject_dtd = Some(dtd.into());
167 self
168 }
169
170 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
172 self.whitespace = handling;
173 self
174 }
175
176 pub fn with_element<F>(mut self, expression: &str, setup: F) -> Self
178 where
179 F: Fn(XmlElementConfig) -> XmlElementConfig,
180 {
181 let expression = XPathExpression::new(expression);
182 let element = setup(XmlElementConfig::new(expression));
183 if self.debug {
184 eprintln!("[STAM fromxml] registered {:?}", element);
185 }
186 self.elements.push(element);
187 self
188 }
189
190 fn element_config(&self, node: Node, path: &NodePath) -> Option<&XmlElementConfig> {
192 for elementconfig in self.elements.iter().rev() {
193 if elementconfig.path.test(path, node, self) {
194 return Some(elementconfig);
195 }
196 }
197 None
198 }
199
200 pub fn add_context(&mut self, key: impl Into<String>, value: toml::Value) {
201 self.context.insert(key.into(), value);
202 }
203
204 pub fn debug(&self) -> bool {
205 self.debug
206 }
207}
208
209#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
210pub enum XmlWhitespaceHandling {
212 Unspecified,
214 Inherit,
216 Preserve,
218 Collapse,
220}
221
222impl Default for XmlWhitespaceHandling {
223 fn default() -> Self {
224 XmlWhitespaceHandling::Unspecified
225 }
226}
227
228impl XmlWhitespaceHandling {
229 fn collapse() -> Self {
230 XmlWhitespaceHandling::Collapse
231 }
232}
233
234#[derive(Debug, Clone, Deserialize, PartialEq, Copy, Default)]
235pub enum XmlAnnotationHandling {
236 #[default]
238 Unspecified,
239
240 None,
242
243 TextSelector,
245
246 ResourceSelector,
248
249 TextSelectorBetweenMarkers,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct XmlElementConfig {
256 #[serde(default)]
259 path: XPathExpression,
260
261 #[serde(default)]
262 annotation: XmlAnnotationHandling,
263
264 #[serde(default)]
265 annotationdata: Vec<XmlAnnotationDataConfig>,
266
267 #[serde(default)]
269 textprefix: Option<String>,
270
271 #[serde(default)]
273 text: Option<bool>,
274
275 #[serde(default)]
277 textsuffix: Option<String>,
278
279 #[serde(default)]
281 annotatetextprefix: Vec<XmlAnnotationDataConfig>,
282
283 #[serde(default)]
285 annotatetextsuffix: Vec<XmlAnnotationDataConfig>,
286
287 #[serde(default)]
289 include_textprefix: Option<bool>,
290
291 #[serde(default)]
293 include_textsuffix: Option<bool>,
294
295 #[serde(default)]
297 base: Vec<String>,
298
299 #[serde(default)]
301 id: Option<String>,
302
303 #[serde(default)]
304 stop: Option<bool>,
306
307 #[serde(default)]
308 whitespace: XmlWhitespaceHandling,
310
311 #[serde(default)]
312 scope_id: Option<String>,
314
315 #[serde(default)]
316 marker_scope: Option<String>,
319
320}
321
322impl XmlElementConfig {
323 fn new(expression: XPathExpression) -> Self {
324 Self {
325 path: expression,
326 stop: None,
327 whitespace: XmlWhitespaceHandling::Unspecified,
328 annotation: XmlAnnotationHandling::Unspecified,
329 annotationdata: Vec::new(),
330 base: Vec::new(),
331 id: None,
332 textprefix: None,
333 text: None,
334 textsuffix: None,
335 annotatetextprefix: Vec::new(),
336 annotatetextsuffix: Vec::new(),
337 include_textprefix: None,
338 include_textsuffix: None,
339 scope_id: None,
340 marker_scope: None,
341 }
342 }
343
344 pub fn update(&mut self, base: &XmlElementConfig) {
345 if self.whitespace == XmlWhitespaceHandling::Unspecified
346 && base.whitespace != XmlWhitespaceHandling::Unspecified
347 {
348 self.whitespace = base.whitespace;
349 }
350 if self.annotation == XmlAnnotationHandling::Unspecified
351 && base.annotation != XmlAnnotationHandling::Unspecified
352 {
353 self.annotation = base.annotation;
354 }
355 if self.textprefix.is_none() && base.textprefix.is_some() {
356 self.textprefix = base.textprefix.clone();
357 }
358 if self.text.is_none() && base.text.is_some() {
359 self.text = base.text;
360 }
361 if self.textsuffix.is_none() && base.textsuffix.is_some() {
362 self.textsuffix = base.textsuffix.clone();
363 }
364 if self.id.is_none() && base.id.is_some() {
365 self.id = base.id.clone();
366 }
367 if self.stop.is_none() && base.stop.is_some() {
368 self.stop = base.stop;
369 }
370 for annotationdata in base.annotationdata.iter() {
371 if !self.annotationdata.contains(annotationdata) {
372 self.annotationdata.push(annotationdata.clone());
373 }
374 }
375 if self.annotatetextsuffix.is_empty() && !base.annotatetextsuffix.is_empty() {
376 self.annotatetextsuffix = base.annotatetextsuffix.clone();
377 }
378 if self.annotatetextprefix.is_empty() && !base.annotatetextprefix.is_empty() {
379 self.annotatetextprefix = base.annotatetextprefix.clone();
380 }
381 if self.include_textsuffix.is_none() {
382 self.include_textsuffix = base.include_textsuffix;
383 }
384 if self.include_textprefix.is_none() {
385 self.include_textprefix = base.include_textprefix;
386 }
387 }
388
389
390 pub fn with_stop(mut self, stop: bool) -> Self {
392 self.stop = Some(stop);
393 self
394 }
395
396 pub fn with_whitespace(mut self, handling: XmlWhitespaceHandling) -> Self {
398 self.whitespace = handling;
399 self
400 }
401
402 pub fn with_text(mut self, text: bool) -> Self {
403 self.text = Some(text);
404 self
405 }
406
407 pub fn with_base(mut self, iter: impl Iterator<Item = impl Into<String>>) -> Self {
408 self.base = iter.into_iter().map(|s| s.into()).collect();
409 self
410 }
411
412 pub fn without_text(mut self) -> Self {
413 self.text = None;
414 self
415 }
416
417 pub fn with_annotation(mut self, annotation: XmlAnnotationHandling) -> Self {
418 self.annotation = annotation;
419 self
420 }
421
422 fn hash(&self) -> usize {
424 self.path.0.as_ptr() as usize
425 }
426}
427
428impl PartialEq for XmlElementConfig {
429 fn eq(&self, other: &Self) -> bool {
430 self.hash() == other.hash()
431 }
432}
433
434#[derive(Debug, Clone, Deserialize, PartialEq)]
435pub struct XmlAnnotationDataConfig {
436 id: Option<String>,
438 set: Option<String>,
440 key: Option<String>,
442 value: Option<toml::Value>,
444
445 #[serde(default)]
447 valuetype: Option<String>,
448
449 #[serde(default)]
451 allow_empty_value: bool,
452
453 #[serde(default)]
455 skip_if_missing: bool,
456
457
458 #[serde(default)]
460 multiple: bool,
461}
462
463impl XmlAnnotationDataConfig {
464 pub fn with_id(mut self, id: impl Into<String>) -> Self {
465 self.id = Some(id.into());
466 self
467 }
468
469 pub fn with_set(mut self, set: impl Into<String>) -> Self {
470 self.set = Some(set.into());
471 self
472 }
473
474 pub fn with_key(mut self, key: impl Into<String>) -> Self {
475 self.key = Some(key.into());
476 self
477 }
478
479 pub fn with_value(mut self, value: impl Into<toml::Value>) -> Self {
480 self.value = Some(value.into());
481 self
482 }
483}
484
485#[derive(Debug, Clone, PartialEq, Deserialize)]
487struct XPathExpression(String);
488
489impl XPathExpression {
490 pub fn new(expression: impl Into<String>) -> Self {
491 Self(expression.into())
492 }
493
494 pub fn any() -> Self {
495 Self("*".into())
496 }
497
498 pub fn iter<'a>(
499 &'a self,
500 config: &'a XmlConversionConfig,
501 ) -> impl Iterator<Item = (Option<&'a str>, &'a str, Option<&'a str>)> {
502 self.0.trim_start_matches('/').split("/").map(|segment| {
503 let (prefix, name, condition) = Self::parse_segment(segment);
505 let namespace = if let Some(prefix) = prefix {
506 if let Some(namespace) = config.namespaces.get(prefix).map(|x| x.as_str()) {
507 Some(namespace)
508 } else {
509 panic!(
510 "XML namespace prefix not known in configuration: {}",
511 prefix
512 );
513 }
514 } else {
515 None
516 };
517 (namespace, name, condition)
518 })
519 }
520
521 fn test<'a, 'b>(&self, path: &NodePath<'a, 'b>, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
523 let refiter = self.iter(config).collect::<Vec<_>>().into_iter().rev();
524 let pathiter = path.components.iter().rev();
525 self.test_withiter(refiter, pathiter, node, config)
526 }
527
528 fn test_withiter<'a, 'b>(&self, mut refiter: impl Iterator<Item=(Option<&'a str>, &'a str, Option<&'a str>)> + Clone, mut pathiter: impl Iterator<Item=&'a NodePathComponent<'a, 'b>> + Clone, mut node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
530 while let Some((refns, refname, condition)) = refiter.next() {
531 if refns.is_none() && refname == "" && condition.is_none() {
532 if self.test_withiter(refiter.clone(), pathiter.clone(), node, config) {
534 return true;
535 }
536 }
537 if let Some(component) = pathiter.next() {
538 if refname != "" && refname != "*" {
542 if refns.is_none() != component.namespace.is_none() || component.namespace != refns || refname != component.tagname {
543 return false;
544 }
545 }
546 if let Some(condition) = condition {
547 if !self.test_condition(condition, node, config) {
548 return false;
549 }
550 }
551 if let Some(parent) = node.parent() {
552 node = parent;
553 }
554 } else {
555 if refname != "" {
556 return false;
557 }
558 }
559 }
560 true
564 }
565
566
567 fn test_condition<'a,'b>(&self, condition: &'a str, node: Node<'a,'b>, config: &XmlConversionConfig) -> bool {
568 for condition in condition.split(" and ") { if let Some(pos) = condition.find("!=") {
570 let var = &condition[..pos];
571 let right = condition[pos+2..].trim_matches('"');
572 if self.get_var(var, &node, config) == Some(right) {
573 return false;
574 }
575 } else if let Some(pos) = condition.find("=") {
576 let var = &condition[..pos];
577 let right = condition[pos+1..].trim_matches('"');
578 let value = self.get_var(var, &node, config);
579 if value != Some(right) {
580 return false;
581 }
582 } else {
583 let v = self.get_var(condition, &node, config);
585 if v.is_none() || v == Some("") {
586 return false;
587 }
588 }
589 }
590 true
594 }
595
596 fn get_var<'a,'b>(&self, var: &str, node: &Node<'a,'b>, config: &XmlConversionConfig) -> Option<&'a str> {
598 if var.starts_with("@") {
599 if let Some(pos) = var.find(":") {
600 let prefix = &var[1..pos];
601 if let Some(ns) = config.namespaces.get(prefix) {
602 let var = &var[pos+1..];
603 node.attribute((ns.as_str(),var))
604 } else {
605 None
606 }
607 } else {
608 node.attribute(&var[1..])
609 }
610 } else if var == "text()" {
611 node.text().map(|s|s.trim())
612 } else {
613 None
614 }
615 }
616
617 fn parse_segment<'a>(s: &'a str) -> (Option<&'a str>, &'a str, Option<&'a str>) {
619 let (name, condition) = if let (Some(begin), Some(end)) = (s.find("["), s.rfind("]")) {
620 (&s[..begin], Some(&s[begin + 1..end]))
621 } else {
622 (s, None)
623 };
624 if let Some((prefix, name)) = name.split_once(":") {
625 (Some(prefix), name, condition)
626 } else {
627 (None, name, condition)
628 }
629 }
630}
631
632
633
634impl Default for XPathExpression {
635 fn default() -> Self {
636 Self::any()
637 }
638}
639
640#[derive(Clone, Debug, PartialEq)]
641struct NodePathComponent<'a,'b> {
642 namespace: Option<&'a str>,
643 tagname: &'b str,
644 index: Option<usize>,
646}
647
648#[derive(Clone, Debug, PartialEq, Default)]
649struct NodePath<'a, 'b> {
650 components: Vec<NodePathComponent<'a,'b>>,
651}
652
653impl<'a, 'b> Display for NodePath<'a, 'b> {
654 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
655 for component in self.components.iter() {
656 write!(f, "/")?;
657 if let Some(ns) = component.namespace {
658 if let Some(index) = component.index {
659 write!(f, "{{{}}}{}[{}]", ns, component.tagname, index)?;
660 } else {
661 write!(f, "{{{}}}{}", ns, component.tagname)?;
662 }
663 } else {
664 if let Some(index) = component.index {
665 write!(f, "{}[{}]", component.tagname, index)?;
666 } else {
667 write!(f, "{}", component.tagname)?;
668 }
669 }
670 }
671 Ok(())
672 }
673}
674
675impl<'a,'b> NodePath<'a,'b> {
676 fn add(&mut self, node: &Node<'a,'b>, index: Option<usize>) {
677 if node.tag_name().name() != "" {
678 self.components.push(
679 NodePathComponent {
680 namespace: node.tag_name().namespace(),
681 tagname: node.tag_name().name(),
682 index,
683 }
684 )
685 }
686 }
687
688 fn format_as_xpath(&self, prefixes: &HashMap<String, String>) -> String {
689 let mut out = String::new();
690 for component in self.components.iter() {
691 out.push('/');
692 if let Some(ns) = component.namespace {
693 if let Some(prefix) = prefixes.get(ns) {
694 if let Some(index) = component.index {
695 out += &format!("{}:{}[{}]", prefix, component.tagname, index);
696 } else {
697 out += &format!("{}:{}", prefix, component.tagname);
698 }
699 } else {
700 eprintln!("STAM fromxml WARNING: format_as_xpath: namespace {} not defined, no prefix found!", ns);
701 if let Some(index) = component.index {
702 out += &format!("{}[{}]", component.tagname, index);
703 } else {
704 out += &format!("{}", component.tagname);
705 }
706 }
707 } else {
708 if let Some(index) = component.index {
709 out += &format!("{}[{}]", component.tagname, index);
710 } else {
711 out += &format!("{}", component.tagname);
712 }
713 }
714 }
715 out
716 }
717}
718
719
720#[derive(Default,Debug)]
722struct SiblingCounter {
723 map: HashMap<String,usize>,
724}
725
726impl SiblingCounter {
727 fn count<'a,'b>(&mut self, node: &Node<'a,'b>) -> usize {
728 let s = format!("{:?}", node.tag_name());
729 *self.map.entry(s).and_modify(|c| {*c += 1;}).or_insert(1)
730 }
731}
732
733
734#[derive(Debug, Clone, Deserialize)]
735pub struct MetadataConfig {
737 #[serde(default)]
739 annotation: XmlAnnotationHandling,
740
741 #[serde(default)]
742 annotationdata: Vec<XmlAnnotationDataConfig>,
743
744 #[serde(default)]
746 id: Option<String>,
747}
748
749pub fn from_xml<'a>(
751 filename: &Path,
752 config: &XmlConversionConfig,
753 store: &'a mut AnnotationStore,
754) -> Result<(), String> {
755 if config.debug {
756 eprintln!("[STAM fromxml] parsing {}", filename.display());
757 }
758
759 let mut xmlstring = read_to_string(filename)
761 .map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
762
763 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
765 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
766 }
767
768 if xmlstring[..100].find("<!DOCTYPE").is_none() {
770 if let Some(dtd) = config.inject_dtd.as_ref() {
771 xmlstring = dtd.to_string() + &xmlstring
772 };
773 } else if config.inject_dtd.is_some() {
774 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
775 }
776
777 let doc = Document::parse_with_options(
779 &xmlstring,
780 ParsingOptions {
781 allow_dtd: true,
782 ..ParsingOptions::default()
783 },
784 )
785 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
786
787 let mut converter = XmlToStamConverter::new(config);
788 converter
789 .compile()
790 .map_err(|e| format!("Error compiling templates: {}", e))?;
791
792 let textoutfilename = format!(
793 "{}.txt",
794 filename
795 .file_stem()
796 .expect("invalid filename")
797 .to_str()
798 .expect("invalid utf-8 in filename")
799 );
800
801 let mut path = NodePath::default();
803 path.add(&doc.root_element(), None);
804 converter
805 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), 0)
806 .map_err(|e| {
807 format!(
808 "Error extracting element text from {}: {}",
809 filename.display(),
810 e
811 )
812 })?;
813 if config.debug {
814 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
815 }
816 let resource = TextResourceBuilder::new()
817 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
818 .with_text(converter.text.clone())
819 .with_filename(&textoutfilename);
820
821 converter.resource_handle = Some(
822 store
823 .add_resource(resource)
824 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
825 );
826
827 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
828
829 converter
831 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),0, store)
832 .map_err(|e| {
833 format!(
834 "Error extracting element annotation from {}: {}",
835 filename.display(),
836 e
837 )
838 })?;
839
840 Ok(())
841}
842
843pub fn from_multi_xml<'a>(
845 filenames: &Vec<&Path>,
846 outputfile: Option<&Path>,
847 config: &XmlConversionConfig,
848 store: &'a mut AnnotationStore,
849) -> Result<(), String> {
850
851 let textoutfilename = if let Some(outputfile) = outputfile {
852 format!("{}",outputfile.to_str().expect("invalid utf-8 in filename"))
853 } else {
854 format!(
855 "{}.txt",
856 filenames.iter().next().expect("1 or more filename need to be provided")
857 .file_stem()
858 .expect("invalid filename")
859 .to_str()
860 .expect("invalid utf-8 in filename")
861 )
862 };
863
864 let mut xmlstrings: Vec<String> = Vec::new();
866 let mut docs: Vec<Document> = Vec::new();
867 for filename in filenames.iter() {
868 if config.debug {
869 eprintln!("[STAM fromxml] parsing {} (one of multiple)", filename.display());
870 }
871 let mut xmlstring = read_to_string(filename).map_err(|e| format!("Error opening XML file {}: {}", filename.display(), e))?;
873 if xmlstring[..100].find("<!DOCTYPE html>").is_some() && config.inject_dtd.is_some() {
874 xmlstring = xmlstring.replacen("<!DOCTYPE html>", "", 1);
875 }
876 if xmlstring[..100].find("<!DOCTYPE").is_none() {
878 if let Some(dtd) = config.inject_dtd.as_ref() {
879 xmlstring = dtd.to_string() + &xmlstring
880 };
881 } else if config.inject_dtd.is_some() {
882 eprintln!("[STAM fromxml] WARNING: Can not inject DTD because file already has a DOCTYPE");
883 }
884 xmlstrings.push(xmlstring);
885 }
886
887 for (filename, xmlstring) in filenames.iter().zip(xmlstrings.iter()) {
888 let doc = Document::parse_with_options(
890 xmlstring,
891 ParsingOptions {
892 allow_dtd: true,
893 ..ParsingOptions::default()
894 },
895 )
896 .map_err(|e| format!("Error parsing XML file {}: {}", filename.display(), e))?;
897 docs.push(doc);
898 }
899
900 let mut converter = XmlToStamConverter::new(config);
901 converter
902 .compile()
903 .map_err(|e| format!("Error compiling templates: {}", e))?;
904
905 for (i, (doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
906 let mut path = NodePath::default();
907 path.add(&doc.root_element(), None);
908 converter
910 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(textoutfilename.as_str()), Some(&filename.to_string_lossy()), i)
911 .map_err(|e| {
912 format!(
913 "Error extracting element text from {}: {}",
914 filename.display(),
915 e
916 )
917 })?;
918 if config.debug {
919 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
920 }
921 }
922
923 let resource = TextResourceBuilder::new()
924 .with_id(filename_to_id(textoutfilename.as_str(), config).to_string())
925 .with_text(converter.text.clone())
926 .with_filename(&textoutfilename);
927
928 converter.resource_handle = Some(
929 store
930 .add_resource(resource)
931 .map_err(|e| format!("Failed to add resource {}: {}", &textoutfilename, e))?,
932 );
933
934 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata {}: {}", &textoutfilename, e))?;
935
936 for (i,(doc, filename)) in docs.iter().zip(filenames.iter()).enumerate() {
938 let mut path = NodePath::default();
939 path.add(&doc.root_element(), None);
940 converter
941 .extract_element_annotation(doc.root_element(), &path, Some(&filename.to_string_lossy()),i, store)
942 .map_err(|e| {
943 format!(
944 "Error extracting element annotation from {}: {}",
945 filename.display(),
946 e
947 )
948 })?;
949 }
950
951 Ok(())
952}
953
954pub fn from_xml_in_memory<'a>(
956 resource_id: &str,
957 xmlstring: &str,
958 config: &XmlConversionConfig,
959 store: &'a mut AnnotationStore,
960) -> Result<(), String> {
961 if config.debug {
962 eprintln!("[STAM fromxml] parsing XML string");
963 }
964
965 let doc = Document::parse_with_options(
967 &xmlstring,
968 ParsingOptions {
969 allow_dtd: true,
970 ..ParsingOptions::default()
971 },
972 )
973 .map_err(|e| format!("Error parsing XML string: {}", e))?;
974
975 let mut converter = XmlToStamConverter::new(config);
976 converter
977 .compile()
978 .map_err(|e| format!("Error compiling templates: {}", e))?;
979
980 let mut path = NodePath::default();
981 path.add(&doc.root_element(), None);
982 converter
984 .extract_element_text(doc.root_element(), &path, converter.config.whitespace, Some(resource_id), Some(resource_id), 0)
985 .map_err(|e| {
986 format!(
987 "Error extracting element text from {}: {}",
988 resource_id,
989 e
990 )
991 })?;
992 if config.debug {
993 eprintln!("[STAM fromxml] extracted full text: {}", &converter.text);
994 }
995 let resource = TextResourceBuilder::new()
996 .with_id(resource_id)
997 .with_text(converter.text.clone());
998
999 converter.resource_handle = Some(
1000 store
1001 .add_resource(resource)
1002 .map_err(|e| format!("Failed to add resource {}: {}", &resource_id, e))?,
1003 );
1004
1005 converter.add_metadata(store).map_err(|e| format!("Failed to add metadata for {}: {}", &resource_id, e))?;
1006
1007 converter
1009 .extract_element_annotation(doc.root_element(), &path, Some(resource_id), 0, store)
1010 .map_err(|e| {
1011 format!(
1012 "Error extracting element annotation from {}: {}",
1013 resource_id,
1014 e
1015 )
1016 })?;
1017
1018 Ok(())
1019}
1020
1021pub fn filename_to_id<'a>(filename: &'a str, config: &XmlConversionConfig) -> &'a str {
1022 for suffix in config.id_strip_suffix.iter() {
1023 if filename.ends_with(suffix) {
1024 return &filename[..filename.len() - suffix.len()];
1025 }
1026 }
1027 return filename;
1028}
1029
1030#[derive(Clone,Copy,PartialEq, Hash, Eq)]
1031enum PositionType {
1032 Body,
1033 TextPrefix,
1034 TextSuffix,
1035}
1036
1037struct XmlToStamConverter<'a> {
1038 cursor: usize,
1040
1041 text: String,
1043
1044 template_engine: Engine<'a>,
1046
1047 positionmap: HashMap<(usize,NodeId,PositionType), Offset>,
1049
1050 bytepositionmap: HashMap<(usize,NodeId,PositionType), (usize, usize)>,
1052
1053 markers: HashMap<usize, Vec<(usize,NodeId)>>,
1055
1056 scopes: HashMap<String, (usize,NodeId)>,
1058
1059 resource_handle: Option<TextResourceHandle>,
1061
1062 pending_whitespace: bool,
1064
1065 config: &'a XmlConversionConfig,
1067
1068 prefixes: HashMap<String, String>,
1070
1071 global_context: BTreeMap<String, upon::Value>,
1073
1074 variables: BTreeMap<String, BTreeSet<&'a str>>,
1076
1077 debugindent: String,
1078}
1079
1080pub enum XmlConversionError {
1081 StamError(StamError),
1082 TemplateError(String, Option<upon::Error>),
1083 ConfigError(String),
1084}
1085
1086impl From<StamError> for XmlConversionError {
1087 fn from(error: StamError) -> Self {
1088 Self::StamError(error)
1089 }
1090}
1091
1092impl From<upon::Error> for XmlConversionError {
1093 fn from(error: upon::Error) -> Self {
1094 Self::TemplateError("".into(), Some(error))
1095 }
1096}
1097
1098impl Display for XmlConversionError {
1099 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1100 match self {
1101 Self::StamError(e) => e.fmt(f),
1102 Self::TemplateError(s, e) => {
1103 f.write_str(s.as_str())?;
1104 f.write_str(": ")?;
1105 if let Some(e) = e {
1106 e.fmt(f)?;
1107 }
1108 f.write_str("")
1109 }
1110 Self::ConfigError(e) => e.fmt(f),
1111 }
1112 }
1113}
1114
1115impl<'a> XmlToStamConverter<'a> {
1116 fn new(config: &'a XmlConversionConfig) -> Self {
1117 let mut prefixes: HashMap<String, String> = HashMap::new();
1118 for (prefix, namespace) in config.namespaces.iter() {
1119 prefixes.insert(namespace.to_string(), prefix.to_string());
1120 }
1121 let mut template_engine = Engine::new();
1122 template_engine.set_default_formatter(&value_formatter); template_engine.add_function("capitalize", filter_capitalize);
1124 template_engine.add_function("lower", str::to_lowercase);
1125 template_engine.add_function("upper", str::to_uppercase);
1126 template_engine.add_function("trim", |s: &str| s.trim().to_string() );
1127 template_engine.add_function("add", filter_add);
1128 template_engine.add_function("sub", filter_sub);
1129 template_engine.add_function("mul", filter_mul);
1130 template_engine.add_function("div", filter_div);
1131 template_engine.add_function("eq", |a: &upon::Value, b: &upon::Value| a == b);
1132 template_engine.add_function("ne", |a: &upon::Value, b: &upon::Value| a != b);
1133 template_engine.add_function("gt", filter_gt);
1134 template_engine.add_function("lt", filter_lt);
1135 template_engine.add_function("gte", filter_gte);
1136 template_engine.add_function("lte", filter_lte);
1137 template_engine.add_function("int", |a: &upon::Value| match a {
1138 upon::Value::Integer(x) => upon::Value::Integer(*x),
1139 upon::Value::Float(x) => upon::Value::Integer(*x as i64),
1140 upon::Value::String(s) => upon::Value::Integer(s.parse().expect("int filter expects an integer value")),
1141 _ => panic!("int filter expects an integer value"), });
1143 template_engine.add_function("float", |a: &upon::Value| match a {
1144 upon::Value::Float(_) => a.clone(),
1145 upon::Value::Integer(x) => upon::Value::Float(*x as f64),
1146 upon::Value::String(s) => upon::Value::Float(s.parse().expect("float filter expects a float value")),
1147 _ => panic!("int filter expects an integer value"), });
1149 template_engine.add_function("str", |a: upon::Value| match a {
1150 upon::Value::Integer(x) => upon::Value::String(format!("{}",x)),
1151 upon::Value::Float(x) => upon::Value::String(format!("{}",x)),
1152 upon::Value::Bool(x) => upon::Value::String(format!("{}",x)),
1153 upon::Value::String(_) => a,
1154 upon::Value::None => upon::Value::String(String::new()),
1155 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1157 upon::Value::String(s) => s.clone(),
1158 upon::Value::Integer(d) => format!("{}",d),
1159 upon::Value::Float(d) => format!("{}",d),
1160 upon::Value::Bool(d) => format!("{}",d),
1161 _ => String::new(),
1162 }).collect();
1163 upon::Value::String(newlist.join(", "))
1164 },
1165 _ => panic!("map to string not implemented"), });
1167 template_engine.add_function("as_range", |a: i64| upon::Value::List(std::ops::Range { start: 0, end: a }.into_iter().map(|x| upon::Value::Integer(x+1)).collect::<Vec<_>>()) );
1168 template_engine.add_function("last", |list: &[upon::Value]| list.last().map(Clone::clone));
1169 template_engine.add_function("first", |list: &[upon::Value]| {
1170 list.first().map(Clone::clone)
1171 });
1172 template_engine.add_function("tokenize", |s: &str| {
1173 upon::Value::List(
1174 s.split(|c| c == ' ' || c == '\n').filter_map(|x|
1175 if !x.is_empty() {
1176 Some(upon::Value::String(x.to_string()))
1177 } else {
1178 None
1179 }
1180 )
1181 .collect::<Vec<upon::Value>>())
1182 });
1183 template_engine.add_function("replace", |s: &str, from: &str, to: &str| {
1184 upon::Value::String(s.replace(from,to))
1185 });
1186 template_engine.add_function("starts_with", |s: &str, prefix: &str| {
1187 s.starts_with(prefix)
1188 });
1189 template_engine.add_function("ends_with", |s: &str, suffix: &str| {
1190 s.ends_with(suffix)
1191 });
1192 template_engine.add_function("basename", |a: &upon::Value| match a {
1193 upon::Value::String(s) => upon::Value::String(s.split(|c| c == '/' || c == '\\').last().expect("splitting must work").to_string()),
1194 _ => panic!("basename filter expects a string value"), });
1196 template_engine.add_function("noext", |a: &upon::Value| match a {
1197 upon::Value::String(s) => if let Some(pos) = s.rfind('.') {
1198 s[..pos].to_string()
1199 } else {
1200 s.to_string()
1201 },
1202 _ => panic!("basename filter expects a string value"), });
1204 template_engine.add_function("join", |list: &upon::Value, delimiter: &str| match list {
1205 upon::Value::List(list) => { let newlist: Vec<String> = list.iter().map(|v| match v {
1207 upon::Value::String(s) => s.clone(),
1208 upon::Value::Integer(d) => format!("{}",d),
1209 upon::Value::Float(d) => format!("{}",d),
1210 upon::Value::Bool(d) => format!("{}",d),
1211 _ => String::new(),
1212 }).collect();
1213 upon::Value::String(newlist.join(delimiter))
1214 },
1215 _ => {
1216 list.clone() }
1218 });
1219 let mut converter = Self {
1220 cursor: 0,
1221 text: String::new(),
1222 template_engine,
1223 positionmap: HashMap::new(),
1224 bytepositionmap: HashMap::new(),
1225 scopes: HashMap::new(),
1226 markers: HashMap::new(),
1227 resource_handle: None,
1228 pending_whitespace: false,
1229 global_context: BTreeMap::new(),
1230 debugindent: String::new(),
1231 variables: BTreeMap::new(),
1232 prefixes,
1233 config,
1234 };
1235 converter.set_global_context();
1236 converter.add_external_filters();
1237 converter
1238 }
1239
1240 fn add_external_filters(&mut self) {
1241 for filter in self.config.external_filters.clone() {
1242 self.template_engine.add_function(filter.name.clone(), move |value: &upon::Value| filter.run(value) );
1243 }
1244 }
1245
1246 fn compile(&mut self) -> Result<(), XmlConversionError> {
1248 if self.config.debug {
1249 eprintln!("[STAM fromxml] compiling templates");
1250 }
1251 for element in self.config.elements.iter() {
1252 if let Some(textprefix) = element.textprefix.as_ref() {
1253 if self.template_engine.get_template(textprefix.as_str()).is_none() {
1254 let template = self.precompile(textprefix.as_str());
1255 self.template_engine
1256 .add_template(textprefix.clone(), template)
1257 .map_err(|e| {
1258 XmlConversionError::TemplateError(
1259 format!("element/textprefix template {}", textprefix.clone()),
1260 Some(e),
1261 )
1262 })?;
1263 }
1264 }
1265 if let Some(textsuffix) = element.textsuffix.as_ref() {
1266 if self.template_engine.get_template(textsuffix.as_str()).is_none() {
1267 let template = self.precompile(textsuffix.as_str());
1268 self.template_engine
1269 .add_template(textsuffix.clone(), template)
1270 .map_err(|e| {
1271 XmlConversionError::TemplateError(
1272 format!("element/textsuffix template {}", textsuffix.clone()),
1273 Some(e),
1274 )
1275 })?;
1276 }
1277 }
1278 if let Some(id) = element.id.as_ref() {
1279 if self.template_engine.get_template(id.as_str()).is_none() {
1280 let template = self.precompile(id.as_str());
1281 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1282 XmlConversionError::TemplateError(
1283 format!("element/id template {}", id.clone()),
1284 Some(e),
1285 )
1286 })?;
1287 }
1288 }
1289 for annotationdata in element.annotationdata.iter().chain(element.annotatetextprefix.iter()).chain(element.annotatetextsuffix.iter()) {
1290 if let Some(id) = annotationdata.id.as_ref() {
1291 if self.template_engine.get_template(id.as_str()).is_none() {
1292 let template = self.precompile(id.as_str());
1293 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1294 XmlConversionError::TemplateError(
1295 format!("annotationdata/id template {}", id.clone()),
1296 Some(e),
1297 )
1298 })?;
1299 }
1300 }
1301 if let Some(set) = annotationdata.set.as_ref() {
1302 if self.template_engine.get_template(set.as_str()).is_none() {
1303 let template = self.precompile(set.as_str());
1304 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1306 XmlConversionError::TemplateError(
1307 format!("annotationdata/set template {}", set.clone()),
1308 Some(e),
1309 )
1310 })?;
1311 }
1312 }
1313 if let Some(key) = annotationdata.key.as_ref() {
1314 if self.template_engine.get_template(key.as_str()).is_none() {
1315 let template = self.precompile(key.as_str());
1316 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1317 XmlConversionError::TemplateError(
1318 format!("annotationdata/key template {}", key.clone()),
1319 Some(e),
1320 )
1321 })?;
1322 }
1323 }
1324 if let Some(value) = annotationdata.value.as_ref() {
1325 self.compile_value(value)?;
1326 }
1327 }
1328 }
1329 for metadata in self.config.metadata.iter() {
1330 if let Some(id) = metadata.id.as_ref() {
1331 if self.template_engine.get_template(id.as_str()).is_none() {
1332 let template = self.precompile(id.as_str());
1333 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1334 XmlConversionError::TemplateError(
1335 format!("metadata/id template {}", id.clone()),
1336 Some(e),
1337 )
1338 })?;
1339 }
1340 }
1341 for annotationdata in metadata.annotationdata.iter() {
1342 if let Some(id) = annotationdata.id.as_ref() {
1343 if self.template_engine.get_template(id.as_str()).is_none() {
1344 let template = self.precompile(id.as_str());
1345 self.template_engine.add_template(id.clone(), template).map_err(|e| {
1346 XmlConversionError::TemplateError(
1347 format!("annotationdata/id template {}", id.clone()),
1348 Some(e),
1349 )
1350 })?;
1351 }
1352 }
1353 if let Some(set) = annotationdata.set.as_ref() {
1354 if self.template_engine.get_template(set.as_str()).is_none() {
1355 let template = self.precompile(set.as_str());
1356 self.template_engine.add_template(set.clone(), template).map_err(|e| {
1358 XmlConversionError::TemplateError(
1359 format!("annotationdata/set template {}", set.clone()),
1360 Some(e),
1361 )
1362 })?;
1363 }
1364 }
1365 if let Some(key) = annotationdata.key.as_ref() {
1366 if self.template_engine.get_template(key.as_str()).is_none() {
1367 let template = self.precompile(key.as_str());
1368 self.template_engine.add_template(key.clone(), template).map_err(|e| {
1369 XmlConversionError::TemplateError(
1370 format!("annotationdata/key template {}", key.clone()),
1371 Some(e),
1372 )
1373 })?;
1374 }
1375 }
1376 if let Some(value) = annotationdata.value.as_ref() {
1377 self.compile_value(value)?;
1378 }
1379 }
1380 }
1381 Ok(())
1382 }
1383
1384 fn compile_value(&mut self, value: &'a toml::Value) -> Result<(), XmlConversionError> {
1386 match value {
1387 toml::Value::String(value) => {
1388 if self.template_engine.get_template(value.as_str()).is_none() {
1389 let template = self.precompile(value.as_str());
1390 self.template_engine.add_template(value.clone(), template).map_err(|e| {
1391 XmlConversionError::TemplateError(
1392 format!("annotationdata/value template {}", value.clone()),
1393 Some(e),
1394 )
1395 })?;
1396 }
1397 }
1398 toml::Value::Table(map) => {
1399 for (_key, value) in map.iter() {
1400 self.compile_value(value)?;
1401 }
1402 },
1403 toml::Value::Array(list) => {
1404 for value in list.iter() {
1405 self.compile_value(value)?;
1406 }
1407 }
1408 _ => {} }
1410 Ok(())
1411 }
1412
1413 fn extract_element_text<'b>(
1418 &mut self,
1419 node: Node<'a,'b>,
1420 path: &NodePath<'a,'b>,
1421 whitespace: XmlWhitespaceHandling,
1422 resource_id: Option<&str>,
1423 inputfile: Option<&str>,
1424 doc_num: usize,
1425 ) -> Result<(), XmlConversionError> {
1426 if self.config.debug {
1427 eprintln!("[STAM fromxml]{} extracting text for element {}", self.debugindent, path);
1428 }
1429 let mut begin = self.cursor; let mut bytebegin = self.text.len(); let mut end_discount = 0; let mut end_bytediscount = 0;
1433 let mut firsttext = true; let mut elder_siblings = SiblingCounter::default();
1436
1437 if let Some(element_config) = self.config.element_config(node, path) {
1439 if self.config.debug {
1440 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1441 }
1442
1443 if (element_config.stop == Some(false) || element_config.stop.is_none())
1444 && element_config.annotation != XmlAnnotationHandling::TextSelectorBetweenMarkers
1445 {
1446 let whitespace = if node.has_attribute((NS_XML, "space")) {
1449 match node.attribute((NS_XML, "space")).unwrap() {
1451 "preserve" => XmlWhitespaceHandling::Preserve,
1452 "collapse" | "replace" => XmlWhitespaceHandling::Collapse,
1453 _ => whitespace,
1454 }
1455 } else if element_config.whitespace == XmlWhitespaceHandling::Inherit
1456 || element_config.whitespace == XmlWhitespaceHandling::Unspecified
1457 {
1458 whitespace } else {
1460 element_config.whitespace };
1462
1463 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1465
1466 let textbegin = self.cursor;
1467 for child in node.children() {
1469 if self.config.debug {
1470 eprintln!("[STAM fromxml]{} child {:?}", self.debugindent,child);
1471 eprintln!("[STAM fromxml]{} cursor={} begin={} textbegin={}", self.debugindent, self.cursor, begin, textbegin);
1472 }
1473 if child.is_text() && element_config.text == Some(true) {
1474 let mut innertext = child.text().expect("text node must have text");
1478 let mut pending_whitespace = false;
1479 let mut leading_whitespace = false;
1480 if whitespace == XmlWhitespaceHandling::Collapse && !innertext.is_empty() {
1481 let mut all_whitespace = true;
1483 leading_whitespace = innertext.chars().next().unwrap().is_whitespace();
1484
1485 pending_whitespace = innertext
1489 .chars()
1490 .inspect(|c| {
1491 if !c.is_whitespace() {
1492 all_whitespace = false
1493 }
1494 })
1495 .last()
1496 .unwrap()
1497 .is_whitespace();
1498 if all_whitespace {
1499 self.pending_whitespace = true;
1500 if self.config.debug {
1501 eprintln!(
1502 "[STAM fromxml]{} ^- all whitespace, flag pending whitespace and skipping...",
1503 self.debugindent,
1504 );
1505 }
1506 continue;
1507 }
1508 innertext = innertext.trim();
1509 if self.config.debug {
1510 eprintln!(
1511 "[STAM fromxml]{} ^- collapsed whitespace: {:?}",
1512 self.debugindent,
1513 innertext
1514 );
1515 }
1516 }
1517 if self.pending_whitespace || leading_whitespace {
1518 if !self.text.is_empty()
1520 && !self.text.chars().rev().next().unwrap().is_whitespace()
1521 {
1522 if self.config.debug {
1523 eprintln!("[STAM fromxml]{} ^- outputting pending whitespace",self.debugindent);
1524 }
1525 self.text.push(' ');
1526 self.cursor += 1;
1527 if firsttext && self.pending_whitespace {
1528 begin += 1;
1529 bytebegin += 1;
1530 if self.config.debug {
1531 eprintln!("[STAM fromxml]{} firsttext, begin is now {}, cursor {}",self.debugindent, begin, self.cursor);
1532 }
1533 }
1534 }
1535 self.pending_whitespace = false;
1536 }
1537
1538 if whitespace == XmlWhitespaceHandling::Collapse {
1540 let mut prevc = ' ';
1541 let mut innertext = innertext.replace(|c: char| c.is_whitespace(), " ");
1542 innertext.retain(|c| {
1543 let do_retain = c != ' ' || prevc != ' ';
1544 prevc = c;
1545 do_retain
1546 });
1547 self.text += &innertext;
1548 self.cursor += innertext.chars().count();
1549 if self.config.debug {
1550 eprintln!("[STAM fromxml]{} ^- outputting text child (collapsed whitespace), cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1551 }
1552 } else {
1553 self.text += &innertext;
1554 self.cursor += innertext.chars().count();
1555 if self.config.debug {
1556 eprintln!("[STAM fromxml]{} ^- outputting text child, cursor is now {}: {}",self.debugindent, self.cursor, innertext);
1557 }
1558 }
1559 firsttext = self.cursor == textbegin;
1560
1561 self.pending_whitespace = pending_whitespace;
1563 } else if child.is_element() {
1564 if self.config.debug {
1565 eprintln!("[STAM fromxml]{} \\- extracting text for this child", self.debugindent);
1566 }
1567 self.debugindent.push_str(" ");
1568 let mut path = path.clone();
1570 let count = elder_siblings.count(&child);
1571 path.add(&child, Some(count));
1572 self.extract_element_text(child, &path, whitespace, resource_id, inputfile, doc_num)?;
1573 firsttext = self.cursor == textbegin;
1574 self.debugindent.pop();
1575 self.debugindent.pop();
1576 } else {
1577 if self.config.debug {
1578 eprintln!("[STAM fromxml]{} ^- skipping this child node", self.debugindent);
1579 }
1580 continue;
1581 }
1582 }
1583
1584 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, textbegin)?;
1586
1587 if let Some(scope_id) = element_config.scope_id.as_ref() {
1589 self.scopes.insert( scope_id.clone(), (doc_num, node.id()) );
1590 }
1591 } else if element_config.annotation == XmlAnnotationHandling::TextSelectorBetweenMarkers
1592 {
1593 if self.config.debug {
1595 eprintln!("[STAM fromxml]{} adding to markers (textprefix={:?}, textsuffix={:?})", self.debugindent, element_config.textprefix, element_config.textsuffix);
1596 }
1597
1598
1599 self.markers
1600 .entry(element_config.hash())
1601 .and_modify(|v| v.push((doc_num, node.id())))
1602 .or_insert(vec![(doc_num, node.id())]);
1603
1604 self.process_textprefix(element_config, node, resource_id, inputfile, doc_num, &mut begin, &mut bytebegin)?;
1607 self.process_textsuffix(element_config, node, resource_id, inputfile, doc_num, &mut end_discount, &mut end_bytediscount, self.cursor)?;
1608 }
1609 } else if self.config.debug {
1610 eprintln!(
1611 "[STAM fromxml]{} WARNING: no match, skipping text extraction for element {}",
1612 self.debugindent,
1613 path
1614 );
1615 }
1616
1617 if begin <= (self.cursor - end_discount) {
1621 let offset = Offset::simple(begin, self.cursor - end_discount);
1622 if self.config.debug {
1623 eprintln!(
1624 "[STAM fromxml]{} extracted text for {} @{:?}: {:?}",
1625 self.debugindent,
1626 path,
1627 &offset,
1628 &self.text[bytebegin..(self.text.len() - end_bytediscount)]
1629 );
1630 }
1631 self.positionmap.insert((doc_num, node.id(), PositionType::Body), offset);
1632 self.bytepositionmap
1633 .insert((doc_num, node.id(), PositionType::Body), (bytebegin, self.text.len() - end_bytediscount));
1634 }
1635 Ok(())
1636 }
1637
1638 fn process_textprefix<'b>(
1640 &mut self,
1641 element_config: &XmlElementConfig,
1642 node: Node<'a,'b>,
1643 resource_id: Option<&str>,
1644 inputfile: Option<&str>,
1645 doc_num: usize,
1646 begin: &mut usize,
1647 bytebegin: &mut usize
1648 ) -> Result<(), XmlConversionError> {
1649 if let Some(textprefix) = &element_config.textprefix {
1650 self.pending_whitespace = false;
1651 if self.config.debug {
1652 eprintln!("[STAM fromxml]{} outputting textprefix: {:?}", self.debugindent, textprefix);
1653 }
1654 let result =
1655 self.render_template(textprefix, &node, Some(self.cursor), None, resource_id, inputfile, doc_num)
1656 .map_err(|e| match e {
1657 XmlConversionError::TemplateError(s, e) => {
1658 XmlConversionError::TemplateError(
1659 format!(
1660 "whilst rendering textprefix template '{}' for node '{}': {}",
1661 textprefix, node.tag_name().name(), s
1662 ),
1663 e,
1664 )
1665 }
1666 e => e,
1667 })?;
1668 let result_charlen = result.chars().count();
1669
1670 if !element_config.annotatetextprefix.is_empty() {
1671 let offset = Offset::simple(self.cursor, self.cursor + result_charlen);
1673 self.positionmap.insert((doc_num, node.id(), PositionType::TextPrefix), offset);
1674 self.bytepositionmap
1675 .insert((doc_num, node.id(), PositionType::TextPrefix), (*bytebegin, *bytebegin + result.len()));
1676 }
1677
1678 self.cursor += result_charlen;
1679 self.text += &result;
1680
1681 if element_config.include_textprefix != Some(true) {
1682 *begin += result_charlen;
1684 *bytebegin += result.len();
1685 }
1686 }
1687 Ok(())
1688 }
1689
1690 fn process_textsuffix<'b>(
1692 &mut self,
1693 element_config: &XmlElementConfig,
1694 node: Node<'a,'b>,
1695 resource_id: Option<&str>,
1696 inputfile: Option<&str>,
1697 doc_num: usize,
1698 end_discount: &mut usize,
1699 end_bytediscount: &mut usize,
1700 textbegin: usize,
1701 ) -> Result<(), XmlConversionError> {
1702 if let Some(textsuffix) = &element_config.textsuffix {
1703 if self.config.debug {
1704 eprintln!("[STAM fromxml]{} outputting textsuffix: {:?}", self.debugindent, textsuffix);
1705 }
1706 let result = self.render_template(
1707 textsuffix.as_str(),
1708 &node,
1709 Some(textbegin),
1710 Some(self.cursor),
1711 resource_id,
1712 inputfile,
1713 doc_num
1714 ).map_err(|e| match e {
1715 XmlConversionError::TemplateError(s, e) => {
1716 XmlConversionError::TemplateError(
1717 format!(
1718 "whilst rendering textsuffix template '{}' for node '{}': {}",
1719 textsuffix,
1720 node.tag_name().name(),
1721 s
1722 ),
1723 e,
1724 )
1725 }
1726 e => e,
1727 })?;
1728 let end_discount_tmp = result.chars().count();
1729 let end_bytediscount_tmp = result.len();
1730
1731
1732 self.text += &result;
1733
1734 if !element_config.annotatetextsuffix.is_empty() {
1735 let offset = Offset::simple(self.cursor, self.cursor + end_discount_tmp);
1737 self.positionmap.insert((doc_num, node.id(), PositionType::TextSuffix), offset);
1738 self.bytepositionmap
1739 .insert((doc_num, node.id(), PositionType::TextSuffix), (self.text.len() - end_bytediscount_tmp, self.text.len()));
1740 }
1741
1742 self.cursor += end_discount_tmp;
1743 self.pending_whitespace = false;
1744
1745 if element_config.include_textsuffix == Some(true) {
1746 *end_discount = 0;
1748 *end_bytediscount = 0;
1749 } else {
1750 *end_discount = end_discount_tmp;
1752 *end_bytediscount = end_bytediscount_tmp;
1753 }
1754 }
1755 Ok(())
1756 }
1757
1758 fn extract_element_annotation<'b>(
1763 &mut self,
1764 node: Node<'a,'b>,
1765 path: &NodePath<'a,'b>,
1766 inputfile: Option<&str>,
1767 doc_num: usize,
1768 store: &mut AnnotationStore,
1769 ) -> Result<(), XmlConversionError> {
1770 if self.config.debug {
1771 eprintln!("[STAM fromxml]{} extracting annotation from {}", self.debugindent, path);
1772 }
1773
1774 let mut elder_siblings = SiblingCounter::default();
1775
1776 if let Some(element_config) = self.config.element_config(node, &path) {
1778 if self.config.debug {
1779 eprintln!("[STAM fromxml]{} matching config: {:?}", self.debugindent, element_config);
1780 }
1781 if element_config.annotation != XmlAnnotationHandling::None
1782 && element_config.annotation != XmlAnnotationHandling::Unspecified
1783 {
1784 let mut builder = AnnotationBuilder::new();
1785
1786 let offset = self.positionmap.get(&(doc_num, node.id(), PositionType::Body));
1788 if element_config.annotation == XmlAnnotationHandling::TextSelector {
1789 if let Some((beginbyte, endbyte)) = self.bytepositionmap.get(&(doc_num, node.id(), PositionType::Body)) {
1790 if self.config.debug {
1791 eprintln!("[STAM fromxml]{} annotation covers text {:?} (bytes {}-{})", self.debugindent, offset, beginbyte, endbyte);
1792 }
1793 } else if self.text.is_empty() {
1794 return Err(XmlConversionError::ConfigError("Can't extract annotations on text if no text was extracted!".into()));
1795 }
1796 }
1797 let begin = if let Some(offset) = offset {
1798 if let Cursor::BeginAligned(begin) = offset.begin {
1799 Some(begin)
1800 } else {
1801 None
1802 }
1803 } else {
1804 None
1805 };
1806 let end = if let Some(offset) = offset {
1807 if let Cursor::BeginAligned(end) = offset.end {
1808 Some(end)
1809 } else {
1810 None
1811 }
1812 } else {
1813 None
1814 };
1815
1816 let resource_id = if let Some(resource_handle) = self.resource_handle {
1817 store.resource(resource_handle).unwrap().id()
1818 } else {
1819 None
1820 };
1821
1822 let mut have_id = false;
1823 if let Some(template) = &element_config.id {
1824 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1825 let compiled_template = self.template_engine.template(template.as_str());
1826 let id = compiled_template.render(&context).to_string().map_err(|e|
1827 XmlConversionError::TemplateError(
1828 format!(
1829 "whilst rendering id template '{}' for node '{}'",
1830 template,
1831 node.tag_name().name(),
1832 ),
1833 Some(e),
1834 )
1835 )?;
1836 if !id.is_empty() {
1837 builder = builder.with_id(id);
1838 have_id = true;
1839 }
1840 }
1841
1842 if !have_id {
1843 if let Some(resource_id) = resource_id {
1845 builder = builder.with_id(stam::generate_id(&format!("{}-",resource_id), ""));
1846 } else {
1847 builder = builder.with_id(stam::generate_id("", ""));
1848 }
1849 }
1850
1851 builder = self.add_annotationdata_to_builder(element_config.annotationdata.iter(), builder, node.clone(), begin, end, resource_id, inputfile, doc_num)?;
1852
1853
1854 if self.config.provenance && inputfile.is_some() {
1855 let path_string = if let Some(id) = node.attribute((NS_XML,"id")) {
1856 format!("//{}[@xml:id=\"{}\"]", self.get_node_name_for_xpath(&node), id)
1858 } else {
1859 path.format_as_xpath(&self.prefixes)
1861 };
1862 let databuilder = AnnotationDataBuilder::new().with_dataset(CONTEXT_ANNO.into()).with_key("target".into()).with_value(
1863 BTreeMap::from([
1864 ("source".to_string(),inputfile.unwrap().into()),
1865 ("selector".to_string(),
1866 BTreeMap::from([
1867 ("type".to_string(),"XPathSelector".into()),
1868 ("value".to_string(),path_string.into())
1869 ]).into()
1870 )
1871 ]).into()
1872 );
1873 builder = builder.with_data_builder(databuilder);
1874 }
1875
1876
1877 match element_config.annotation {
1879 XmlAnnotationHandling::TextSelector => {
1880 if let Some(selector) = self.textselector(node, doc_num, PositionType::Body) {
1882 builder = builder.with_target(selector);
1883 if self.config.debug {
1884 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
1885 }
1886 store.annotate(builder)?;
1887 }
1888 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1889 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1890 }
1891 }
1892 XmlAnnotationHandling::ResourceSelector => {
1893 builder = builder.with_target(SelectorBuilder::ResourceSelector(
1895 self.resource_handle.into(),
1896 ));
1897 if self.config.debug {
1898 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
1899 }
1900 store.annotate(builder)?;
1901 }
1902 XmlAnnotationHandling::TextSelectorBetweenMarkers => {
1903 if let Some(selector) =
1905 self.textselector_for_markers(node, doc_num, store, element_config)
1906 {
1907 builder = builder.with_target(selector);
1908 if self.config.debug {
1909 eprintln!(
1910 "[STAM fromxml] builder TextSelectorBetweenMarkers: {:?}",
1911 builder
1912 );
1913 }
1914 store.annotate(builder)?;
1915 if !element_config.annotatetextprefix.is_empty() || !element_config.annotatetextsuffix.is_empty() {
1916 self.annotate_textaffixes(node, element_config, inputfile, doc_num, store)?;
1917 }
1918 }
1919 }
1920 _ => panic!(
1921 "Invalid annotationhandling: {:?}",
1922 element_config.annotation
1923 ),
1924 }
1925 }
1926
1927 if element_config.stop == Some(false) || element_config.stop.is_none() {
1929 for child in node.children() {
1930 if child.is_element() {
1931 self.debugindent.push_str(" ");
1932 let mut path = path.clone();
1933 let count = elder_siblings.count(&child);
1934 path.add(&child, Some(count));
1935 self.extract_element_annotation(child, &path, inputfile, doc_num, store)?;
1937 self.debugindent.pop();
1938 self.debugindent.pop();
1939 }
1940 }
1941 }
1942 } else {
1943 eprintln!(
1944 "[STAM fromxml]{} WARNING: no match, skipping annotation extraction for element {}",
1945 self.debugindent,
1946 path
1947 );
1948 }
1949 Ok(())
1950 }
1951
1952 fn add_annotationdata_to_builder<'input>(&self, iter: impl Iterator<Item = &'a XmlAnnotationDataConfig>,
1953 mut builder: AnnotationBuilder<'a>,
1954 node: Node<'a, 'input>,
1955 begin: Option<usize>,
1956 end: Option<usize>,
1957 resource_id: Option<&str>,
1958 inputfile: Option<&str>,
1959 doc_num: usize,
1960 ) -> Result<AnnotationBuilder<'a>, XmlConversionError> {
1961 for annotationdata in iter {
1962 let mut databuilder = AnnotationDataBuilder::new();
1963 if let Some(template) = &annotationdata.set {
1964 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1965 let compiled_template = self.template_engine.template(template.as_str());
1966 let dataset = compiled_template.render(&context).to_string().map_err(|e|
1967 XmlConversionError::TemplateError(
1968 format!(
1969 "whilst rendering annotationdata/dataset template '{}' for node '{}'",
1970 template,
1971 node.tag_name().name(),
1972 ),
1973 Some(e),
1974 )
1975 )?;
1976 if !dataset.is_empty() {
1977 databuilder = databuilder.with_dataset(dataset.into())
1978 }
1979 } else {
1980 databuilder =
1981 databuilder.with_dataset(self.config.default_set.as_str().into());
1982 }
1983 if let Some(template) = &annotationdata.key {
1984 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
1985 let compiled_template = self.template_engine.template(template.as_str());
1986 match compiled_template.render(&context).to_string().map_err(|e|
1987 XmlConversionError::TemplateError(
1988 format!(
1989 "whilst rendering annotationdata/key template '{}' for node '{}'",
1990 template,
1991 node.tag_name().name(),
1992 ),
1993 Some(e),
1994 )
1995 ) {
1996 Ok(key) if !key.is_empty() =>
1997 databuilder = databuilder.with_key(key.into()) ,
1998 Ok(_) if !annotationdata.skip_if_missing => {
1999 return Err(XmlConversionError::TemplateError(
2000 format!(
2001 "whilst rendering annotationdata/key template '{}' for node '{}'",
2002 template,
2003 node.tag_name().name(),
2004 ),
2005 None
2006 ));
2007 },
2008 Err(e) if !annotationdata.skip_if_missing => {
2009 return Err(e)
2010 },
2011 _ => {
2012 continue
2014 }
2015 }
2016 }
2017 if let Some(value) = &annotationdata.value {
2018 match self.extract_value(value, node, annotationdata.allow_empty_value, annotationdata.skip_if_missing, annotationdata.valuetype.as_ref().map(|s| s.as_str()), begin, end, resource_id, inputfile, doc_num)? {
2019 Some(DataValue::List(values)) if annotationdata.multiple => {
2020 for value in values {
2021 let mut databuilder_multi = databuilder.clone();
2022 databuilder_multi = databuilder_multi.with_value(value);
2023 builder = builder.with_data_builder(databuilder_multi);
2024 }
2025 },
2026 Some(value) => {
2027 databuilder = databuilder.with_value(value);
2028 },
2029 None => {
2030 continue
2032 }
2033 }
2034 }
2035 if !annotationdata.multiple {
2036 builder = builder.with_data_builder(databuilder);
2037 }
2038 }
2039 Ok(builder)
2040 }
2041
2042 fn annotate_textaffixes<'b>(
2044 &mut self,
2045 node: Node<'a,'b>,
2046 element_config: &XmlElementConfig,
2047 inputfile: Option<&str>,
2048 doc_num: usize,
2049 store: &mut AnnotationStore,
2050 ) -> Result<(), XmlConversionError> {
2051
2052
2053 if !element_config.annotatetextprefix.is_empty() {
2054 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textprefix-", ""));
2055 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextPrefix)) {
2056 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2057 Some(begin)
2058 } else {
2059 None
2060 };
2061 let end = if let Cursor::BeginAligned(end) = offset.end {
2062 Some(end)
2063 } else {
2064 None
2065 };
2066 builder = self.add_annotationdata_to_builder(element_config.annotatetextprefix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextPrefix) {
2068 builder = builder.with_target(selector);
2069 if self.config.debug {
2070 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2071 }
2072 store.annotate(builder)?;
2073 } else {
2074 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2075 }
2076 }
2077 }
2078
2079 if !element_config.annotatetextsuffix.is_empty() {
2080 let mut builder = AnnotationBuilder::new().with_id(stam::generate_id("textsuffix-", ""));
2081 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), PositionType::TextSuffix)) {
2082 let begin = if let Cursor::BeginAligned(begin) = offset.begin {
2083 Some(begin)
2084 } else {
2085 None
2086 };
2087 let end = if let Cursor::BeginAligned(end) = offset.end {
2088 Some(end)
2089 } else {
2090 None
2091 };
2092 builder = self.add_annotationdata_to_builder(element_config.annotatetextsuffix.iter(), builder, node.clone(), begin,end, None, inputfile, doc_num)?; if let Some(selector) = self.textselector(node, doc_num, PositionType::TextSuffix) {
2094 builder = builder.with_target(selector);
2095 if self.config.debug {
2096 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2097 }
2098 store.annotate(builder)?;
2099 } else {
2100 return Err(XmlConversionError::ConfigError("Failed to create textselector to target textprefix".into()));
2101 }
2102 }
2103 }
2104 Ok(())
2105 }
2106
2107 fn extract_value<'b>(&self, value: &'a toml::Value, node: Node<'a,'b>, allow_empty_value: bool, skip_if_missing: bool, valuetype: Option<&str>, begin: Option<usize>, end: Option<usize>, resource_id: Option<&str>, inputfile: Option<&str>, doc_num: usize) -> Result<Option<DataValue>, XmlConversionError>{
2109 match value {
2110 toml::Value::String(template) => {
2111 let context = self.context_for_node(&node, begin, end, template.as_str(), resource_id, inputfile, doc_num);
2112 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2124 XmlConversionError::TemplateError(
2125 format!(
2126 "whilst rendering annotationdata/map template '{}' for node '{}'.{}",
2127 template,
2128 node.tag_name().name(),
2129 if self.config.debug() {
2130 format!("\nContext was {:?}.\nVariables are: {:?}", context, self.variables.get(template))
2131 } else {
2132 String::new()
2133 }
2134 ),
2135 Some(e),
2136 )
2137 ) {
2138 Ok(value) => {
2139 if !value.is_empty() || allow_empty_value {
2140 string_to_datavalue(value, valuetype).map(|v| Some(v))
2141 } else {
2142 Ok(None)
2144 }
2145 },
2146 Err(e) if !skip_if_missing => {
2147 Err(e)
2148 },
2149 Err(_) if allow_empty_value => {
2150 Ok(Some("".into()))
2151 },
2152 Err(_) => {
2153 Ok(None)
2155 }
2156 }
2157 },
2158 toml::Value::Table(map) => {
2159 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2160 for (key, value) in map.iter() {
2161 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2162 resultmap.insert(key.clone(), value);
2163 }
2164 }
2165 Ok(Some(resultmap.into()))
2166 },
2167 toml::Value::Array(list) => {
2168 let mut resultlist: Vec<DataValue> = Vec::new();
2169 for value in list.iter() {
2170 if let Some(value) = self.extract_value(value, node, false, true, None, begin, end, resource_id, inputfile, doc_num)? {
2171 resultlist.push(value);
2172 }
2173 }
2174 Ok(Some(resultlist.into()))
2175 }
2176 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2177 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2178 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2179 toml::Value::Datetime(_v) => {
2180 todo!("fromxml: Datetime conversion not implemented yet");
2181 }
2182 }
2183 }
2184
2185 fn extract_value_metadata<'b>(&self, value: &'a toml::Value, context: &upon::Value, allow_empty_value: bool, skip_if_missing: bool, resource_id: Option<&str>) -> Result<Option<DataValue>, XmlConversionError>{
2187 match value {
2188 toml::Value::String(template) => {
2189 let compiled_template = self.template_engine.template(template.as_str()); match compiled_template.render(&context).to_string().map_err(|e|
2191 XmlConversionError::TemplateError(
2192 format!(
2193 "whilst rendering annotationdata/metadata template '{}' for metadata",
2194 template,
2195 ),
2196 Some(e),
2197 )
2198 ) {
2199 Ok(value) => {
2200 if !value.is_empty() || allow_empty_value {
2201 Ok(Some(value.into()))
2202 } else {
2203 Ok(None)
2205 }
2206 },
2207 Err(e) if !skip_if_missing => {
2208 Err(e)
2209 },
2210 Err(_) if allow_empty_value => {
2211 Ok(Some("".into()))
2212 },
2213 Err(_) => {
2214 Ok(None)
2216 }
2217 }
2218 },
2219 toml::Value::Table(map) => {
2220 let mut resultmap: BTreeMap<String,DataValue> = BTreeMap::new();
2221 for (key, value) in map.iter() {
2222 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2223 resultmap.insert(key.clone(), value);
2224 }
2225 }
2226 Ok(Some(resultmap.into()))
2227 },
2228 toml::Value::Array(list) => {
2229 let mut resultlist: Vec<DataValue> = Vec::new();
2230 for value in list.iter() {
2231 if let Some(value) = self.extract_value_metadata(value, context, false, true, resource_id)? {
2232 resultlist.push(value);
2233 }
2234 }
2235 Ok(Some(resultlist.into()))
2236 }
2237 toml::Value::Boolean(v) => Ok(Some(DataValue::Bool(*v))),
2238 toml::Value::Float(v) => Ok(Some(DataValue::Float(*v))),
2239 toml::Value::Integer(v) => Ok(Some(DataValue::Int(*v as isize))),
2240 toml::Value::Datetime(_v) => {
2241 todo!("fromxml: Datetime conversion not implemented yet");
2242 }
2243 }
2244 }
2245
2246 fn textselector<'s>(&'s self, node: Node, doc_num: usize, positiontype: PositionType) -> Option<SelectorBuilder<'s>> {
2248 let res_handle = self.resource_handle.expect("resource must be associated");
2249 if let Some(offset) = self.positionmap.get(&(doc_num, node.id(), positiontype)) {
2250 Some(SelectorBuilder::TextSelector(
2251 BuildItem::Handle(res_handle),
2252 offset.clone(),
2253 ))
2254 } else {
2255 None
2256 }
2257 }
2258
2259 fn textselector_for_markers<'b>(
2261 &self,
2262 node: Node,
2263 doc_num: usize,
2264 store: &AnnotationStore,
2265 element_config: &'b XmlElementConfig,
2266 ) -> Option<SelectorBuilder<'b>> {
2267 let resource = store
2268 .resource(
2269 self.resource_handle
2270 .expect("resource must have been created"),
2271 )
2272 .expect("resource must exist");
2273 let mut end: Option<usize> = None;
2274 if let Some(markers) = self.markers.get(&element_config.hash()) {
2275 let mut grab = false;
2276 for (d_num, n_id) in markers.iter() {
2277 if grab {
2278 end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2280 offset
2281 .begin
2282 .try_into()
2283 .expect("begin cursor must be beginaligned")
2284 });
2285 break;
2286 }
2287 if doc_num == *d_num && *n_id == node.id() {
2288 grab = true;
2290 }
2291 }
2292 };
2293 if end.is_none() {
2294 if let Some(scope) = element_config.marker_scope.as_deref() {
2297 if let Some((d_num, n_id)) = self.scopes.get(scope) {
2298 end = self.positionmap.get(&(*d_num, *n_id, PositionType::Body)).map(|offset| {
2299 offset
2300 .end
2301 .try_into()
2302 .expect("end cursor must be beginaligned")
2303 });
2304 } else {
2305 eprintln!("WARNING: Undefined scope referenced in marker_scope: {}, no matching text with this `scope_id` in this document! Skipping last marker!", scope);
2306 return None;
2307 }
2308 } else {
2309 end = Some(resource.textlen());
2311 }
2312 }
2313 if let (Some(offset), Some(end)) = (self.positionmap.get(&(doc_num, node.id(), PositionType::Body)), end) {
2314 Some(SelectorBuilder::TextSelector(
2315 BuildItem::Handle(self.resource_handle.unwrap()),
2316 Offset::simple(
2317 offset
2318 .begin
2319 .try_into()
2320 .expect("begin cursor must be beginaligned"),
2321 end,
2322 ),
2323 ))
2324 } else {
2325 None
2326 }
2327 }
2328
2329 fn set_global_context(&mut self) {
2330 self.global_context
2331 .insert("context".into(), upon::Value::Map(self.config.context.iter().map(|(k,v)| (k.clone(), map_value(v))).collect()));
2332 self.global_context
2333 .insert("namespaces".into(), self.config.namespaces.clone().into());
2334 self.global_context
2335 .insert("default_set".into(), self.config.default_set.clone().into());
2336 }
2337
2338 fn render_template<'input, 't>(
2339 &self,
2340 template: &'t str,
2341 node: &Node<'a, 'input>,
2342 begin: Option<usize>,
2343 end: Option<usize>,
2344 resource: Option<&str>,
2345 inputfile: Option<&str>,
2346 doc_num: usize,
2347 ) -> Result<Cow<'t, str>, XmlConversionError> {
2348 if template.chars().any(|c| c == '{') {
2349 let compiled_template = self.template_engine.template(template);
2351 let context = self.context_for_node(&node, begin, end, template, resource, inputfile, doc_num);
2352 let result = compiled_template.render(context).to_string()?;
2353 Ok(Cow::Owned(result))
2354 } else {
2355 Ok(Cow::Borrowed(template))
2357 }
2358 }
2359
2360 fn context_for_node<'input>(
2361 &self,
2362 node: &Node<'a, 'input>,
2363 begin: Option<usize>,
2364 end: Option<usize>,
2365 template: &str,
2366 resource: Option<&str>,
2367 inputfile: Option<&str>,
2368 doc_num: usize,
2369 ) -> upon::Value {
2370 let mut context = self.global_context.clone();
2371 let length = if let (Some(begin), Some(end)) = (begin, end) {
2372 Some(end - begin)
2373 } else {
2374 None
2375 };
2376 context.insert("localname".into(), node.tag_name().name().into());
2377 context.insert("name".into(), self.get_node_name_for_template(node).into());
2379 if let Some(namespace) = node.tag_name().namespace() {
2380 context.insert("namespace".into(), namespace.into());
2382 }
2383
2384 if let Some(begin) = begin {
2386 context.insert("begin".into(), upon::Value::Integer(begin as i64));
2387 }
2388 if let Some(end) = end {
2389 context.insert("end".into(), upon::Value::Integer(end as i64));
2390 }
2391 if let Some(length) = length {
2392 context.insert("length".into(), upon::Value::Integer(length as i64));
2393 }
2394 if let Some(resource) = resource {
2395 context.insert("resource".into(), resource.into());
2397 }
2398 if let Some(inputfile) = inputfile {
2399 context.insert("inputfile".into(), inputfile.into());
2401 }
2402 context.insert("doc_num".into(), upon::Value::Integer(doc_num as i64));
2404
2405 if let Some(vars) = self.variables.get(template) {
2406 for var in vars {
2407 let mut encodedvar = String::new();
2408 if let Some(value) = self.context_for_var(node, var, &mut encodedvar, false) {
2409 if self.config.debug() {
2410 eprintln!(
2411 "[STAM fromxml] Set context variable for template '{}' for node '{}': {}={:?} (encodedvar={})",
2412 template,
2413 node.tag_name().name(),
2414 var,
2415 value,
2416 encodedvar
2417 );
2418 }
2419 if value != upon::Value::None {
2420 context.insert(encodedvar, value);
2421 }
2422 } else if self.config.debug() {
2423 eprintln!(
2424 "[STAM fromxml] Missed context variable for template '{}' for node '{}': {}",
2425 template,
2426 node.tag_name().name(),
2427 var
2428 );
2429 }
2430 }
2431 }
2432 upon::Value::Map(context)
2433 }
2434
2435 fn context_for_var<'input>(
2439 &self,
2440 node: &Node<'a, 'input>,
2441 var: &str,
2442 path: &mut String,
2443 mut return_all_matches: bool,
2444 ) -> Option<upon::Value> {
2445
2446 let first = path.is_empty();
2448
2449 let var = if var.starts_with("?.$$") {
2450 if first {
2451 path.push_str("?.ELEMENTS_");
2452 return_all_matches = true;
2453 if self.config.debug {
2454 eprintln!("[STAM fromxml] will return all matches for {}", var);
2455 }
2456 };
2457 &var[4..]
2458 } else if var.starts_with("?.$") {
2459 if first {
2460 path.push_str("?.ELEMENT_");
2461 };
2462 &var[3..]
2463 } else if var.starts_with("$$") {
2464 if first {
2465 path.push_str("ELEMENTS_");
2466 return_all_matches = true;
2467 if self.config.debug {
2468 eprintln!("[STAM fromxml] will return all matches for {}", var);
2469 }
2470 };
2471 &var[2..]
2472 } else if var.starts_with("$") {
2473 if first {
2474 path.push_str("ELEMENT_");
2475 };
2476 &var[1..]
2477 } else if var.starts_with("?.@") {
2478 if first {
2479 path.push_str("?.");
2480 };
2481 &var[2..]
2482 } else {
2483 var
2484 };
2485
2486 if !first && !var.is_empty() && !path.ends_with("ELEMENT_") && !path.ends_with("ELEMENTS_"){
2487 path.push_str("_IN_");
2488 }
2489
2490 let (component, remainder) = var.split_once("/").unwrap_or((var,""));
2492 if component.is_empty() {
2494 if first && !remainder.is_empty() {
2495 let mut n = node.clone();
2497 while let Some(parentnode) = n.parent_element() {
2499 n = parentnode;
2500 }
2501 let (rootcomponent, remainder) = remainder.split_once("/").unwrap_or((remainder,""));
2503 let (prefix, localname) = if let Some(pos) = rootcomponent.find(":") {
2504 (Some(&rootcomponent[0..pos]), &rootcomponent[pos+1..])
2505 } else {
2506 (None, rootcomponent)
2507 };
2508 if localname != n.tag_name().name() && localname != "*" {
2510 None
2511 } else {
2512 if let Some(prefix) = prefix {
2513 path.push_str(prefix);
2514 path.push_str("__");
2515 }
2516 path.push_str(localname);
2517 self.context_for_var(&n, remainder, path, return_all_matches)
2518 }
2519 } else {
2520 Some(recursive_text(node).into())
2523 }
2524 } else if component.starts_with("@"){
2525 if let Some(pos) = component.find(":") {
2526 let prefix = &component[1..pos];
2527 if let Some(ns) = self.config.namespaces.get(prefix) {
2528 let var = &component[pos+1..];
2529 path.push_str("ATTRIB_");
2530 path.push_str(prefix);
2531 path.push_str("__");
2532 path.push_str(var);
2533 Some(
2534 node.attribute((ns.as_str(),var)).into()
2535 )
2536 } else {
2537 None
2538 }
2539 } else {
2540 let var = &component[1..];
2541 path.push_str("ATTRIB_");
2542 path.push_str(var);
2543 Some(
2544 node.attribute(var).into()
2545 )
2546 }
2547 } else if component == ".." {
2548 if let Some(parentnode) = node.parent_element().as_ref() {
2549 path.push_str("PARENT");
2551 self.context_for_var(parentnode, remainder, path, return_all_matches)
2552 } else {
2553 None
2554 }
2555 } else if component == "." {
2556 path.push_str("THIS");
2557 if !remainder.is_empty() {
2558 self.context_for_var(node, remainder, path, return_all_matches)
2560 } else {
2561 Some(recursive_text(node).into())
2562 }
2563 } else {
2564 let (prefix, localname) = if let Some(pos) = component.find(":") {
2565 (Some(&component[0..pos]), &component[pos+1..])
2566 } else {
2567 (None, component)
2568 };
2569 let localname_with_condition = localname;
2570 let (localname, condition_str, condition) = self.extract_condition(localname_with_condition); let mut multiple_value_buffer: Vec<upon::Value> = Vec::new(); let mut final_path: String = String::new(); for child in node.children() {
2575 if child.is_element() {
2576 let namedata = child.tag_name();
2577 let mut child_matches = if let Some(namespace) = namedata.namespace() {
2578 if let Some(foundprefix) = self.prefixes.get(namespace) {
2579 Some(foundprefix.as_str()) == prefix && localname == namedata.name()
2580 } else {
2581 false
2582 }
2583 } else {
2584 namedata.name() == localname
2585 };
2586 if child_matches {
2587 if let Some((attribname, negate, attribvalue)) = condition {
2589 if let Some(pos) = attribname.find(":") {
2591 let prefix = &attribname[0..pos];
2592 if let Some(ns) = self.config.namespaces.get(prefix) {
2593 let attribname = &attribname[pos+1..];
2594 if let Some(value) = child.attribute((ns.as_str(),attribname)) {
2595 if !negate && attribvalue != Some(value) {
2596 child_matches = false;
2597 } else if negate && attribvalue == Some(value) {
2598 child_matches = false;
2599 }
2600 } else {
2601 child_matches = false;
2602 }
2603 } else {
2604 child_matches = false;
2605 }
2606 } else {
2607 if let Some(value) = child.attribute(attribname) {
2608 if !negate && attribvalue != Some(value) {
2609 child_matches = false;
2610 } else if negate && attribvalue == Some(value) {
2611 child_matches = false;
2612 }
2613 } else {
2614 child_matches = false;
2615 }
2616 }
2617 }
2618 if !child_matches && self.config.debug {
2619 eprintln!("[STAM fromxml] candidate node does not meet condition: {}", localname_with_condition);
2620 }
2621 }
2623 if child_matches {
2624 let prevpathlen = path.len();
2625 if let Some(prefix) = prefix {
2627 path.push_str(prefix);
2628 path.push_str("__");
2629 }
2630 path.push_str(localname);
2631 if condition.is_some() {
2632 let mut hasher = DefaultHasher::new();
2634 condition_str.hash(&mut hasher);
2635 let h = hasher.finish();
2636 path.push_str(&format!("_COND{}_", h));
2637 }
2638 if let Some(value) = self.context_for_var(&child, remainder, path, return_all_matches) {
2639 if return_all_matches {
2641 if let upon::Value::List(v) = value {
2642 multiple_value_buffer.extend(v.into_iter());
2643 } else {
2644 multiple_value_buffer.push(value);
2645 }
2646 if final_path.is_empty() {
2647 final_path = path.clone();
2648 }
2649 } else {
2651 return Some(value);
2653 }
2654 }
2655 path.truncate(prevpathlen);
2657 }
2658 }
2659 }
2660 if !multiple_value_buffer.is_empty() {
2661 if self.config.debug {
2663 eprintln!("[STAM fromxml] returning multiple matches of {} as list", var);
2664 }
2665 *path = final_path;
2667 Some(multiple_value_buffer.into())
2668 } else {
2669 if self.config.debug {
2671 eprintln!("[STAM fromxml] returning with no match found for {} in {}", var, node.tag_name().name());
2672 }
2673 None
2674 }
2675 }
2676 }
2677
2678 fn extract_condition<'b>(&self, localname: &'b str) -> (&'b str, &'b str, Option<(&'b str, bool, Option<&'b str>)>) { if localname.ends_with("]") {
2681 if let Some(pos) = localname.find("[") {
2682 let condition = &localname[pos+1..localname.len()-1];
2683 let (mut attrib, negation, attribvalue) = if let Some(pos) = condition.find("=") {
2684 let attrib = condition[0..pos].trim();
2685 let value = condition[pos+1..].trim();
2686 let value = &value[1..value.len() - 1]; if attrib.ends_with('!') {
2688 (attrib[..attrib.len() - 1].trim(), true, Some(value))
2690 } else {
2691 (attrib.trim(), false, Some(value))
2692 }
2693 } else {
2694 (condition, false, None)
2695 };
2696 if attrib.starts_with('@') {
2697 attrib = &attrib[1..];
2699 }
2700 return (&localname[..pos], condition, Some((attrib, negation,attribvalue )) );
2701 }
2702 }
2703 (localname, "", None)
2704 }
2705
2706
2707 fn get_node_name_for_template<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2708 let extended_name = node.tag_name();
2709 match (extended_name.namespace(), extended_name.name()) {
2710 (Some(namespace), tagname) => {
2711 if let Some(prefix) = self.prefixes.get(namespace) {
2712 Cow::Owned(format!("{}__{}", prefix, tagname))
2713 } else {
2714 Cow::Borrowed(tagname)
2715 }
2716 }
2717 (None, tagname) => Cow::Borrowed(tagname),
2718 }
2719 }
2720
2721 fn get_node_name_for_xpath<'b>(&self, node: &'b Node) -> Cow<'b,str> {
2722 let extended_name = node.tag_name();
2723 match (extended_name.namespace(), extended_name.name()) {
2724 (Some(namespace), tagname) => {
2725 if let Some(prefix) = self.prefixes.get(namespace) {
2726 Cow::Owned(format!("{}:{}", prefix, tagname))
2727 } else {
2728 Cow::Borrowed(tagname)
2729 }
2730 }
2731 (None, tagname) => Cow::Borrowed(tagname),
2732 }
2733 }
2734
2735
2736 fn precompile(&mut self, template: &'a str) -> Cow<'a,str> {
2737 let mut replacement = String::new();
2738 let mut variables: BTreeSet<&'a str> = BTreeSet::new();
2739 let mut begin = 0;
2740 let mut end = 0;
2741 for i in 0..template.len() {
2742 let slice = &template[i..];
2743 if slice.starts_with("{{") || slice.starts_with("{%") {
2744 begin = i;
2745 } else if slice.starts_with("}}") || slice.starts_with("%}") {
2746 if end < begin+2 {
2747 replacement.push_str(&template[end..begin+2]);
2748 }
2749 let inner = &template[begin+2..i]; replacement.push_str(&self.precompile_inblock(inner, &mut variables));
2751 end = i;
2752 }
2753 }
2754 if end > 0 {
2755 replacement.push_str(&template[end..]);
2756 }
2757 self.variables.insert(template.into(), variables);
2758 if !replacement.is_empty() {
2761 Cow::Owned(replacement)
2762 } else {
2763 Cow::Borrowed(template)
2764 }
2765 }
2766
2767 fn precompile_inblock<'s>(&self, s: &'s str, vars: &mut BTreeSet<&'s str>) -> Cow<'s,str> {
2768 let mut quoted = false;
2769 let mut var = false;
2770 let mut begin = 0;
2771 let mut end = 0;
2772 let mut replacement = String::new();
2773 let mut in_condition = false;
2774 for (i,c) in s.char_indices() {
2775 if in_condition && c != ']' {
2776 continue;
2777 }
2778 if c == '"' {
2779 quoted = !quoted;
2780 } else if !quoted {
2781 if !var && (c == '@' || c == '$') {
2782 var = true;
2784 begin = i;
2785 } else if var && c == '[' {
2786 in_condition = true;
2787 } else if var && in_condition && c == ']' {
2788 in_condition = false;
2790 } else if var && in_condition {
2791 continue;
2793 } else if var && (!c.is_alphanumeric() && c != '$' && c != '.' && c != '/' && c != '_' && c != ':' && c != '@') {
2794 if end < begin {
2796 replacement.push_str(&s[end..begin]);
2797 }
2798 let varname = &s[begin..i];
2799 vars.insert(varname);
2800 let replacement_var = self.precompile_name(varname);
2801 replacement += &replacement_var;
2802 end = i;
2803 var = false;
2804 }
2805 }
2806 }
2807 if end > 0 {
2808 replacement.push_str(&s[end..]);
2809 }
2810 if var {
2811 let varname = &s[begin..];
2813 vars.insert(varname);
2814 let replacement_var = self.precompile_name(varname);
2815 replacement += &replacement_var;
2816 }
2817 if !replacement.is_empty() {
2818 Cow::Owned(replacement)
2820 } else {
2821 Cow::Borrowed(s)
2822 }
2823 }
2824
2825 fn precompile_name(&self, s: &str) -> String {
2827 let mut replacement = String::new();
2828 let mut begincondition = None;
2829 let mut skip = 0;
2830 for (i,c) in s.char_indices() {
2831 if begincondition.is_some() && c != ']' {
2832 continue;
2833 } else if skip > 0 {
2834 skip -= 1;
2835 continue;
2836 }
2837 if c == '$' {
2838 let slice = &s[i..];
2839 if slice.starts_with("$$..") {
2840 replacement.push_str("ELEMENTS_PARENT");
2841 skip = 3;
2842 } else if slice.starts_with("$$.") {
2843 replacement.push_str("ELEMENTS_THIS");
2844 skip = 2;
2845 } else if slice.starts_with("$$/") {
2846 replacement.push_str("ELEMENTS_");
2847 skip = 2;
2848 } else if slice.starts_with("$$") {
2849 replacement.push_str("ELEMENTS_");
2850 skip = 1;
2851 } else if slice.starts_with("$..") {
2852 replacement.push_str("ELEMENT_PARENT");
2853 skip = 2;
2854 } else if slice.starts_with("$.") {
2855 replacement.push_str("ELEMENT_THIS");
2856 skip = 1;
2857 } else if slice.starts_with("$/") {
2858 replacement.push_str("ELEMENT_");
2859 skip = 1;
2860 } else {
2861 replacement.push_str("ELEMENT_");
2862 }
2863 } else if c == '@' {
2864 replacement.push_str("ATTRIB_");
2865 } else if c == '/' {
2866 replacement.push_str("_IN_");
2867 } else if c == ':' {
2868 replacement.push_str("__");
2869 } else if c == '[' {
2870 begincondition = Some(i+1);
2871 } else if c == ']' {
2872 if let Some(begin) = begincondition {
2874 let mut hasher = DefaultHasher::new();
2875 let _ = &s[begin..i].hash(&mut hasher);
2876 let h = hasher.finish();
2877 replacement.push_str(&format!("_COND{}_", h));
2878 }
2879 begincondition = None;
2880 } else {
2881 replacement.push(c);
2882 }
2883 }
2884 replacement
2886 }
2887
2888 fn add_metadata(&self, store: &mut AnnotationStore) -> Result<(), XmlConversionError> {
2889 for metadata in self.config.metadata.iter() {
2890 let mut builder = AnnotationBuilder::new();
2891
2892 let resource_id = if let Some(resource_handle) = self.resource_handle {
2893 store.resource(resource_handle).unwrap().id()
2894 } else {
2895 None
2896 };
2897
2898 let mut context = self.global_context.clone();
2899 if let Some(resource_id) = resource_id {
2900 context.insert("resource".into(), resource_id.into());
2901 }
2902
2903 if let Some(template) = &metadata.id {
2904 let compiled_template = self.template_engine.template(template.as_str());
2905 let id = compiled_template.render(&context).to_string().map_err(|e|
2906 XmlConversionError::TemplateError(
2907 format!(
2908 "whilst rendering metadata id template '{}'",
2909 template,
2910 ),
2911 Some(e),
2912 )
2913 )?;
2914 if !id.is_empty() {
2915 builder = builder.with_id(id);
2916 }
2917 }
2918
2919 for annotationdata in metadata.annotationdata.iter() {
2920 let mut databuilder = AnnotationDataBuilder::new();
2921 if let Some(template) = &annotationdata.set {
2922 let compiled_template = self.template_engine.template(template.as_str());
2923 let dataset = compiled_template.render(&context).to_string().map_err(|e|
2924 XmlConversionError::TemplateError(
2925 format!(
2926 "whilst rendering annotationdata/dataset template '{}' for metadata",
2927 template,
2928 ),
2929 Some(e),
2930 )
2931 )?;
2932 if !dataset.is_empty() {
2933 databuilder = databuilder.with_dataset(dataset.into())
2934 }
2935 } else {
2936 databuilder =
2937 databuilder.with_dataset(self.config.default_set.as_str().into());
2938 }
2939 if let Some(template) = &annotationdata.key {
2940 let compiled_template = self.template_engine.template(template.as_str());
2941 match compiled_template.render(&context).to_string().map_err(|e|
2942 XmlConversionError::TemplateError(
2943 format!(
2944 "whilst rendering annotationdata/key template '{}' for metadata",
2945 template,
2946 ),
2947 Some(e),
2948 )
2949 ) {
2950 Ok(key) if !key.is_empty() =>
2951 databuilder = databuilder.with_key(key.into()) ,
2952 Ok(_) if !annotationdata.skip_if_missing => {
2953 return Err(XmlConversionError::TemplateError(
2954 format!(
2955 "whilst rendering annotationdata/key template '{}' metadata",
2956 template,
2957 ),
2958 None
2959 ));
2960 },
2961 Err(e) if !annotationdata.skip_if_missing => {
2962 return Err(e)
2963 },
2964 _ => {
2965 continue
2967 }
2968 }
2969 }
2970 if let Some(value) = &annotationdata.value {
2971 match self.extract_value_metadata(value, &upon::Value::Map(context.clone()), annotationdata.allow_empty_value, annotationdata.skip_if_missing, resource_id.as_deref())? {
2972 Some(value) => {
2973 databuilder = databuilder.with_value(value);
2974 },
2975 None => {
2976 continue
2978 }
2979 }
2980 }
2981 builder = builder.with_data_builder(databuilder);
2982 }
2983
2984
2985
2986 match metadata.annotation {
2988 XmlAnnotationHandling::TextSelector => {
2989 builder = builder.with_target(SelectorBuilder::TextSelector(BuildItem::Handle(self.resource_handle.expect("resource must have handle")), Offset::whole()));
2991 if self.config.debug {
2992 eprintln!("[STAM fromxml] builder AnnotateText: {:?}", builder);
2993 }
2994 store.annotate(builder)?;
2995 }
2996 XmlAnnotationHandling::ResourceSelector | XmlAnnotationHandling::None | XmlAnnotationHandling::Unspecified => {
2997 builder = builder.with_target(SelectorBuilder::ResourceSelector(
2999 self.resource_handle.into(),
3000 ));
3001 if self.config.debug {
3002 eprintln!("[STAM fromxml] builder AnnotateResource: {:?}", builder);
3003 }
3004 store.annotate(builder)?;
3005 }
3006 _ => panic!(
3007 "Invalid annotationhandling for metadata: {:?}",
3008 metadata.annotation
3009 ),
3010 }
3011 }
3012 Ok(())
3013 }
3014}
3015
3016
3017
3018fn recursive_text(node: &Node) -> String {
3020 let mut s = String::new();
3021 for child in node.children() {
3022 if child.is_text() {
3023 s += child.text().expect("should have text");
3024 } else if child.is_element() {
3025 s += &recursive_text(&child);
3026 }
3027 }
3028 s
3029}
3030
3031fn filter_capitalize(s: &str) -> String {
3033 let mut out = String::with_capacity(s.len());
3034 for (i, c) in s.chars().enumerate() {
3035 if i == 0 {
3036 out.push_str(&c.to_uppercase().collect::<String>())
3037 } else {
3038 out.push(c);
3039 }
3040 }
3041 out
3042}
3043
3044fn filter_gt(a: &upon::Value, b: &upon::Value) -> bool {
3045 match (a, b) {
3046 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a > *b,
3047 (upon::Value::Float(a), upon::Value::Float(b)) => *a > *b,
3048 (upon::Value::String(a), upon::Value::String(b)) => *a > *b,
3049 _ => false,
3050 }
3051}
3052
3053fn filter_lt(a: &upon::Value, b: &upon::Value) -> bool {
3054 match (a, b) {
3055 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a < *b,
3056 (upon::Value::Float(a), upon::Value::Float(b)) => *a < *b,
3057 (upon::Value::String(a), upon::Value::String(b)) => *a < *b,
3058 _ => false,
3059 }
3060}
3061
3062fn filter_gte(a: &upon::Value, b: &upon::Value) -> bool {
3063 match (a, b) {
3064 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a >= *b,
3065 (upon::Value::Float(a), upon::Value::Float(b)) => *a >= *b,
3066 (upon::Value::String(a), upon::Value::String(b)) => *a >= *b,
3067 _ => false,
3068 }
3069}
3070
3071fn filter_lte(a: &upon::Value, b: &upon::Value) -> bool {
3072 match (a, b) {
3073 (upon::Value::Integer(a), upon::Value::Integer(b)) => *a <= *b,
3074 (upon::Value::Float(a), upon::Value::Float(b)) => *a <= *b,
3075 (upon::Value::String(a), upon::Value::String(b)) => *a <= *b,
3076 _ => false,
3077 }
3078}
3079
3080fn filter_add(a: &upon::Value, b: &upon::Value) -> upon::Value {
3081 match (a, b) {
3082 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a + b),
3083 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a + b),
3084 (upon::Value::String(a), upon::Value::String(b)) => upon::Value::String(a.clone() + b),
3085 _ => upon::Value::None,
3086 }
3087}
3088
3089fn filter_sub(a: &upon::Value, b: &upon::Value) -> upon::Value {
3090 match (a, b) {
3091 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a - b),
3092 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a - b),
3093 _ => upon::Value::None,
3094 }
3095}
3096
3097fn filter_mul(a: &upon::Value, b: &upon::Value) -> upon::Value {
3098 match (a, b) {
3099 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a * b),
3100 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a * b),
3101 _ => upon::Value::None,
3102 }
3103}
3104
3105fn filter_div(a: &upon::Value, b: &upon::Value) -> upon::Value {
3106 match (a, b) {
3107 (upon::Value::Integer(a), upon::Value::Integer(b)) => upon::Value::Integer(a / b),
3108 (upon::Value::Float(a), upon::Value::Float(b)) => upon::Value::Float(a / b),
3109 _ => upon::Value::None,
3110 }
3111}
3112
3113
3114fn map_value(value: &toml::Value) -> upon::Value {
3116 match value {
3117 toml::Value::String(s) => upon::Value::String(s.clone()),
3118 toml::Value::Integer(i) => upon::Value::Integer(*i),
3119 toml::Value::Float(i) => upon::Value::Float(*i),
3120 toml::Value::Boolean(v) => upon::Value::Bool(*v),
3121 toml::Value::Datetime(s) => upon::Value::String(s.to_string()),
3122 toml::Value::Array(v) => upon::Value::List(v.iter().map(|i| map_value(i)).collect()),
3123 toml::Value::Table(v) => upon::Value::Map(v.iter().map(|(k,i)| (k.clone(),map_value(i))).collect()),
3124 }
3125}
3126
3127#[inline]
3129fn string_to_datavalue(value: String, valuetype: Option<&str>) -> Result<DataValue,XmlConversionError> {
3130 match valuetype {
3131 Some("str") | Some("string") => Ok(DataValue::String(value)),
3132 Some("int") => {
3133 if let Ok(value) = value.parse::<isize>() {
3134 Ok(DataValue::Int(value))
3135 } else {
3136 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3137 }
3138 },
3139 Some("float") => {
3140 if let Ok(value) = value.parse::<f64>() {
3141 Ok(DataValue::Float(value))
3142 } else {
3143 Err(XmlConversionError::TemplateError(format!("Unable to interpret value as integer: {}", value), None))
3144 }
3145 },
3146 Some("bool") => match value.as_str() {
3147 "yes" | "true" | "enabled" | "on" | "1" | "active" => Ok(DataValue::Bool(true)),
3148 _ => Ok(DataValue::Bool(false))
3149 },
3150 Some(x) => {
3151 Err(XmlConversionError::TemplateError(format!("Invalid valuetype: {}", x), None))
3152 }
3153 None => {
3154 if let Ok(value) = value.parse::<isize>() {
3156 Ok(DataValue::Int(value))
3157 } else if let Ok(value) = value.parse::<f64>() {
3158 Ok(DataValue::Float(value))
3159 } else if value.starts_with("(list) [ ") && value.ends_with(" ]") {
3160 if let Ok(serde_json::Value::Array(values)) = serde_json::from_str(&value[6..]) {
3162 Ok(DataValue::List(values.into_iter().map(|v| {
3163 match v {
3164 serde_json::Value::String(s) => DataValue::String(s),
3165 serde_json::Value::Number(n) => if let Some(n) = n.as_i64() {
3166 DataValue::Int(n as isize)
3167 } else if let Some(n) = n.as_f64() {
3168 DataValue::Float(n)
3169 } else {
3170 unreachable!("number should always be either int or float")
3171 },
3172 serde_json::Value::Bool(b) => DataValue::Bool(b),
3173 _ => DataValue::Null, }
3175 }).collect()))
3176 } else {
3177 Err(XmlConversionError::TemplateError(format!("Unable to deserialize list value: {}", value), None))
3178 }
3179 } else {
3180 Ok(value.into())
3181 }
3182 }
3183 }
3184}
3185
3186fn string_to_templatevalue(value: String) -> upon::Value {
3187 if let Ok(value) = value.parse::<i64>() {
3188 upon::Value::Integer(value)
3189 } else if let Ok(value) = value.parse::<f64>() {
3190 upon::Value::Float(value)
3191 } else {
3192 upon::Value::String(value)
3193 }
3194}
3195
3196fn value_formatter(f: &mut upon::fmt::Formatter<'_>, value: &upon::Value) -> upon::fmt::Result {
3199 match value {
3200 upon::Value::List(vs) => {
3201 f.write_str("(list) [ ")?;
3202 for (i, v) in vs.iter().enumerate() {
3203 if i > 0 {
3204 f.write_str(", ")?;
3205 }
3206 if let upon::Value::String(s) = v {
3207 write!(f, "\"{}\"", s.replace("\"","\\\"").replace("\n"," ").split_whitespace().collect::<Vec<_>>().join(" "))?;
3208 } else {
3209 upon::fmt::default(f, v)?;
3210 f.write_char('"')?;
3211 }
3212 }
3213 f.write_str(" ]")?;
3214 }
3215 v => upon::fmt::default(f, v)?, };
3217 Ok(())
3218}
3219
3220#[derive(Clone,Debug,Deserialize)]
3221struct ExternalFilter {
3222 name: String,
3224
3225 command: String,
3227
3228 args: Vec<String>
3230}
3231
3232impl ExternalFilter {
3233 fn run(&self, input_value: &upon::Value) -> upon::Value {
3235 let process = Command::new(self.command.as_str()).args(
3236 self.args.iter().map(|x| if x == "{{value}}" || x == "{{ value }}" || x == "$value" {
3238 match input_value {
3239 upon::Value::String(s) => s.clone(),
3240 upon::Value::Integer(d) => format!("{}",d),
3241 upon::Value::Float(d) => format!("{}",d),
3242 upon::Value::Bool(d) => format!("{}",d),
3243 upon::Value::None => String::new(),
3244 _ => panic!("Lists and maps are not supported to be passed as parameter to external filters yet!"),
3245 }
3246 } else {
3247 x.clone() })
3249 ).stdin(Stdio::piped()).stdout(Stdio::piped()).spawn();
3250
3251
3252 if let Ok(mut process) = process {
3253 {
3254 let mut outstdin = process.stdin.take().expect("unable to open stdin for external filter");
3255 let mut writer = BufWriter::new(&mut outstdin);
3256 match input_value {
3257 upon::Value::String(s) => writer.write(s.as_bytes()),
3258 upon::Value::Integer(d) => writer.write(format!("{}",d).as_bytes()),
3259 upon::Value::Float(d) => writer.write(format!("{}",d).as_bytes()),
3260 upon::Value::Bool(d) => writer.write(format!("{}",d).as_bytes()),
3261 upon::Value::None => writer.write(&[]),
3262 _ => panic!("Lists and maps are not supported to be passed as input to external filters yet!"),
3263 }.expect("Writing to stdin for external filter failed!");
3264 }
3266 let output = process.wait_with_output().expect("External filter wasn't running");
3267 if !output.status.success() {
3268 panic!("External filter {} failed ({:?})", self.name, output.status.code());
3269 }
3270 if let Ok(s) = String::from_utf8(output.stdout) {
3271 return string_to_templatevalue(s);
3272 } else {
3273 panic!("External filter {} produced invalid UTF-8!", self.name);
3274 }
3275 }
3276 panic!("External filter {} failed!", self.name);
3277 }
3278}
3279
3280#[cfg(test)]
3281mod tests {
3282 use super::*;
3283 const XMLSMALLEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3286<head><title>test</title></head><body><h1>TEST</h1><p xml:id="p1" n="001">This is a <em xml:id="emphasis" style="color:green">test</em>.</p></body></html>"#;
3287
3288 const XMLEXAMPLE: &'static str = r#"<!DOCTYPE entities[<!ENTITY nbsp " ">]>
3289<html xmlns="http://www.w3.org/1999/xhtml" xmlns:my="http://example.com">
3290<head>
3291 <title>Test</title>
3292 <meta name="author" content="proycon" />
3293</head>
3294<body>
3295 <h1>Header</h1>
3296
3297 <p xml:id="par1">
3298 <span xml:id="sen1">This is a sentence.</span>
3299 <span xml:id="sen2">This is the second sentence.</span>
3300 </p>
3301 <p xml:id="par2">
3302 <strong>This</strong> is the <em>second</em> paragraph.
3303 It has a <strong>bold</strong> word and one in <em>italics</em>.<br/>
3304 Let's highlight stress in the following word: <span my:stress="secondary">re</span>pu<span my:stress="primary">ta</span>tion.
3305 </p>
3306 <p xml:space="preserve"><![CDATA[This third
3307paragraph consists
3308of CDATA and is configured to preserve whitespace, and weird &entities; ]]></p>
3309
3310 <h2>Subsection</h2>
3311
3312 <p>
3313 Have some fruits:<br/>
3314 <ul xml:id="list1" class="fruits">
3315 <li xml:id="fruit1">apple</li>
3316 <li xml:id="fruit2">banana</li>
3317 <li xml:id="fruit3">melon</li>
3318 </ul>
3319 </p>
3320
3321 Some lingering text outside of any confines...
3322</body>
3323</html>"#;
3324
3325 const XMLEXAMPLE_TEXTOUTPUT: &'static str = "Header\n\nThis is a sentence. This is the second sentence.\n\nThis is the second paragraph. It has a bold word and one in italics.\nLet's highlight stress in the following word: reputation.\n\nThis third\nparagraph consists\nof CDATA and is configured to preserve whitespace, and weird &entities; \nSubsection\n\nHave some fruits:\n* apple\n* banana\n* melon\n\nSome lingering text outside of any confines...";
3326
3327 const XMLTEISPACE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3329<body><space dim="vertical" unit="lines" quantity="3" /></body></html>"#;
3330
3331 const CONF: &'static str = r#"#default whitespace handling (Collapse or Preserve)
3332whitespace = "Collapse"
3333default_set = "urn:stam-fromhtml"
3334
3335[namespaces]
3336#this defines the namespace prefixes you can use in this configuration
3337xml = "http://www.w3.org/XML/1998/namespace"
3338html = "http://www.w3.org/1999/xhtml"
3339xsd = "http://www.w3.org/2001/XMLSchema"
3340xlink = "http://www.w3.org/1999/xlink"
3341
3342# elements and attributes are matched in reverse-order, so put more generic statements before more specific ones
3343
3344#Define some base elements that we reuse later for actual elements (prevents unnecessary repetition)
3345[baseelements.common]
3346id = "{% if ?.@xml:id %}{{ @xml:id }}{% endif %}"
3347
3348 [[baseelements.common.annotationdata]]
3349 key = "type"
3350 value = "{{ localname }}"
3351
3352 [[baseelements.common.annotationdata]]
3353 key = "lang"
3354 value = "{{ @xml:lang }}"
3355 skip_if_missing = true
3356
3357 [[baseelements.common.annotationdata]]
3358 key = "n"
3359 value = "{{ @n }}"
3360 skip_if_missing = true
3361 valuetype = "int"
3362
3363 [[baseelements.common.annotationdata]]
3364 key = "nstring"
3365 value = "{{ @n }}"
3366 skip_if_missing = true
3367 valuetype = "string"
3368
3369 [[baseelements.common.annotationdata]]
3370 key = "style"
3371 value = "{{ @style }}"
3372 skip_if_missing = true
3373
3374 [[baseelements.common.annotationdata]]
3375 key = "class"
3376 value = "{{ @class }}"
3377 skip_if_missing = true
3378
3379 [[baseelements.common.annotationdata]]
3380 key = "src"
3381 value = "{{ @src }}"
3382 skip_if_missing = true
3383
3384[baseelements.text]
3385text = true
3386
3387
3388[[elements]]
3389base = [ "text", "common" ]
3390path = "*"
3391text = true
3392annotation = "TextSelector"
3393
3394# Pass through the following elements without mapping to text
3395[[elements]]
3396base = [ "common" ]
3397path = "//html:head"
3398
3399[[elements]]
3400base = [ "common" ]
3401path = "//html:head//*"
3402
3403# Map metadata like <meta name="key" content="value"> to annotations with key->value data selecting the resource (ResourceSelector)
3404[[elements]]
3405base = [ "common" ]
3406path = "//html:head//html:meta"
3407
3408[[elements.annotationdata]]
3409key = "{% if ?.@name %}{{ name }}{% endif %}"
3410value = "{% if ?.@content %}{{ @content }}{% endif %}"
3411skip_if_missing = true
3412
3413# By default, ignore any tags in the head (unless they're mentioned specifically later in the config)
3414[[elements]]
3415path = "//html:head/html:title"
3416annotation = "ResourceSelector"
3417
3418[[elements.annotationdata]]
3419key = "title"
3420value = "{{ $. | trim }}"
3421
3422
3423# Determine how various structural elements are converted to text
3424
3425[[elements]]
3426base = [ "common" ]
3427path = "//html:br"
3428textsuffix = "\n"
3429
3430[[elements]]
3431base = [ "common", "text" ]
3432path = "//html:p"
3433textprefix = "\n"
3434textsuffix = "\n"
3435annotation = "TextSelector"
3436
3437# Let's do headers and bulleted lists like markdown
3438[[elements]]
3439base = [ "common", "text" ]
3440path = "//html:h1"
3441textsuffix = "\n"
3442annotation = "TextSelector"
3443id = "h1"
3444
3445[[elements]]
3446base = [ "common", "text" ]
3447path = "//html:body//html:h2"
3448textsuffix = "\n"
3449annotation = "TextSelector"
3450id = "h2"
3451
3452#Generic, will be overriden by more specific one
3453[[elements]]
3454base = [ "common", "text" ]
3455path = "//html:li"
3456textprefix = "- "
3457textsuffix = "\n"
3458
3459[[elements]]
3460base = [ "common", "text" ]
3461path = """//html:body"""
3462annotation = "TextSelector"
3463id = "body"
3464
3465 [[elements.annotationdata]]
3466 key = "title_from_parent"
3467 value = "{{ $../html:head/html:title }}"
3468 skip_if_missing = true
3469
3470 [[elements.annotationdata]]
3471 key = "title_from_root"
3472 value = "{{ $/html:html/html:head/html:title }}"
3473 skip_if_missing = true
3474
3475 [[elements.annotationdata]]
3476 key = "firstfruit"
3477 value = """{{ $./html:p/html:ul/html:li }}"""
3478 skip_if_missing = true
3479
3480 [[elements.annotationdata]]
3481 key = "fruits"
3482 value = """{{ $$./html:p/html:ul/html:li }}"""
3483 skip_if_missing = true
3484
3485 [[elements.annotationdata]]
3486 key = "multifruits"
3487 value = """{{ $$./html:p/html:ul/html:li }}"""
3488 skip_if_missing = true
3489 multiple = true
3490
3491#More specific one takes precendence over the above generic one
3492[[elements]]
3493base = [ "common", "text" ]
3494path = """//html:ul[@class="fruits"]/html:li"""
3495textprefix = "* "
3496textsuffix = "\n"
3497
3498#Not real HTML, test-case modelled after TEI space
3499[[elements]]
3500base = [ "common" ]
3501path = """//html:space[@dim="vertical" and @unit="lines"]"""
3502text = true
3503textsuffix = """\n{% for x in @quantity | int | as_range %}\n{% endfor %}"""
3504
3505
3506[[elements]]
3507base = [ "common", "text" ]
3508path = "//html:example"
3509annotation = "TextSelector"
3510
3511[[elements.annotationdata]]
3512key = "requiredattrib"
3513value = "{{ @requiredattrib }}"
3514
3515[[elements.annotationdata]]
3516key = "optattrib"
3517value = "{{ ?.@optattrib }}"
3518
3519[[elements]]
3520base = [ "common","text" ]
3521path = "//html:marquee"
3522annotation = "TextSelector"
3523
3524#map value, some bogus data to test parsing
3525[[elements.annotationdata]]
3526key = "map"
3527
3528[elements.annotationdata.value]
3529text = "{{ $. }}"
3530number = 42
3531bogus = true
3532
3533[[metadata]]
3534id = "metadata"
3535
3536[[metadata.annotationdata]]
3537key = "author"
3538value = "proycon"
3539"#;
3540
3541 const XMLREQATTRIBEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3542<body><example xml:id="ann1" requiredattrib="blah">test</example></body></html>"#;
3543
3544 const XMLREQATTRIBEXAMPLE2: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3545<body><example xml:id="ann1">test</example></body></html>"#;
3546
3547 const XMLREQATTRIBEXAMPLE3: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3548<body><example xml:id="ann1" requiredattrib="blah" optattrib="blah">test</example></body></html>"#;
3549
3550 const XMLMAPEXAMPLE: &'static str = r#"<html xmlns="http://www.w3.org/1999/xhtml">
3551<body><marquee xml:id="ann1">test</marquee></body></html>"#;
3552
3553 #[test]
3554 fn test_precompile_template_nochange() -> Result<(), String> {
3555 let config = XmlConversionConfig::new();
3556 let mut conv = XmlToStamConverter::new(&config);
3557 let template_in = "{{ foo }}";
3558 let template_out = conv.precompile(template_in);
3559 assert_eq!( template_out, template_in);
3560 assert!(!conv.variables.get(template_in).as_ref().unwrap().contains("foo"));
3562 Ok(())
3563 }
3564
3565 #[test]
3566 fn test_precompile_template_attrib() -> Result<(), String> {
3567 let config = XmlConversionConfig::new();
3568 let mut conv = XmlToStamConverter::new(&config);
3569 let template_in = "{{ @foo }}";
3570 let template_out = conv.precompile(template_in);
3571 assert_eq!(template_out, "{{ ATTRIB_foo }}");
3572 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3574 Ok(())
3575 }
3576
3577 #[test]
3578 fn test_precompile_template_attrib_ns() -> Result<(), String> {
3579 let config = XmlConversionConfig::new();
3580 let mut conv = XmlToStamConverter::new(&config);
3581 let template_in = "{{ @bar:foo }}";
3582 let template_out = conv.precompile(template_in);
3583 assert_eq!(template_out, "{{ ATTRIB_bar__foo }}");
3584 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@bar:foo"));
3586 Ok(())
3587 }
3588
3589 #[test]
3590 fn test_precompile_template_element() -> Result<(), String> {
3591 let config = XmlConversionConfig::new();
3592 let mut conv = XmlToStamConverter::new(&config);
3593 let template_in = "{{ $foo }}";
3594 let template_out = conv.precompile(template_in);
3595 assert_eq!(template_out, "{{ ELEMENT_foo }}");
3596 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$foo"));
3598 Ok(())
3599 }
3600
3601 #[test]
3602 fn test_precompile_template_element_ns() -> Result<(), String> {
3603 let config = XmlConversionConfig::new();
3604 let mut conv = XmlToStamConverter::new(&config);
3605 let template_in = "{{ $bar:foo }}";
3606 let template_out = conv.precompile(template_in);
3607 assert_eq!(template_out, "{{ ELEMENT_bar__foo }}");
3608 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$bar:foo"));
3610 Ok(())
3611 }
3612
3613 #[test]
3614 fn test_precompile_template_this_text() -> Result<(), String> {
3615 let config = XmlConversionConfig::new();
3616 let mut conv = XmlToStamConverter::new(&config);
3617 let template_in = "{{ $. }}";
3618 let template_out = conv.precompile(template_in);
3619 assert_eq!(template_out, "{{ ELEMENT_THIS }}");
3620 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$."));
3621 Ok(())
3622 }
3623
3624 #[test]
3625 fn test_precompile_template_parent_text() -> Result<(), String> {
3626 let config = XmlConversionConfig::new();
3627 let mut conv = XmlToStamConverter::new(&config);
3628 let template_in = "{{ $.. }}";
3629 let template_out = conv.precompile(template_in);
3630 assert_eq!(template_out, "{{ ELEMENT_PARENT }}");
3631 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$.."));
3632 Ok(())
3633 }
3634
3635 #[test]
3636 fn test_precompile_template_elements() -> Result<(), String> {
3637 let config = XmlConversionConfig::new();
3638 let mut conv = XmlToStamConverter::new(&config);
3639 let template_in = "{{ $$foo }}";
3640 let template_out = conv.precompile(template_in);
3641 assert_eq!(template_out, "{{ ELEMENTS_foo }}");
3642 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$foo"));
3643 Ok(())
3644 }
3645
3646 #[test]
3647 fn test_precompile_template_elements_ns() -> Result<(), String> {
3648 let config = XmlConversionConfig::new();
3649 let mut conv = XmlToStamConverter::new(&config);
3650 let template_in = "{{ $$bar:foo }}";
3651 let template_out = conv.precompile(template_in);
3652 assert_eq!(template_out, "{{ ELEMENTS_bar__foo }}");
3653 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$$bar:foo"));
3654 Ok(())
3655 }
3656
3657
3658 #[test]
3659 fn test_precompile_template_attrib2() -> Result<(), String> {
3660 let config = XmlConversionConfig::new();
3661 let mut conv = XmlToStamConverter::new(&config);
3662 let template_in = "{% for x in @foo %}";
3663 let template_out = conv.precompile(template_in);
3664 assert_eq!(template_out, "{% for x in ATTRIB_foo %}");
3665 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3667 Ok(())
3668 }
3669
3670 #[test]
3671 fn test_precompile_template_attrib3() -> Result<(), String> {
3672 let config = XmlConversionConfig::new();
3673 let mut conv = XmlToStamConverter::new(&config);
3674 let template_in = "{{ ?.@foo }}";
3675 let template_out = conv.precompile(template_in);
3676 assert_eq!(template_out, "{{ ?.ATTRIB_foo }}");
3677 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("@foo"));
3678 Ok(())
3679 }
3680
3681 #[test]
3682 fn test_precompile_template_path() -> Result<(), String> {
3683 let config = XmlConversionConfig::new();
3684 let mut conv = XmlToStamConverter::new(&config);
3685 let template_in = "{{ $x/y/z/@a }}";
3686 let template_out = conv.precompile(template_in);
3687 assert_eq!(template_out, "{{ ELEMENT_x_IN_y_IN_z_IN_ATTRIB_a }}");
3688 assert!(conv.variables.get(template_in).as_ref().unwrap().contains("$x/y/z/@a"));
3689 Ok(())
3690 }
3691
3692 #[test]
3693 fn test_loadconfig() -> Result<(), String> {
3694 let config = XmlConversionConfig::from_toml_str(CONF)?;
3695 let mut conv = XmlToStamConverter::new(&config);
3696 conv.compile().map_err(|e| format!("{}",e))?;
3697 assert_eq!(conv.config.namespaces.len(),4 , "number of namespaces");
3698 assert_eq!(conv.config.elements.len(), 15, "number of elements");
3699 assert_eq!(conv.config.baseelements.len(), 2, "number of baseelements");
3700 assert_eq!(conv.config.elements.get(0).unwrap().annotationdata.len(), 7,"number of annotationdata under first element");
3701 assert_eq!(conv.config.baseelements.get("common").unwrap().annotationdata.len(), 7,"number of annotationdata under baseelement common");
3702 Ok(())
3703 }
3704
3705 #[test]
3706 fn test_small() -> Result<(), String> {
3707 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3708 let mut store = stam::AnnotationStore::new(stam::Config::new());
3709 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3710 let res = store.resource("test").expect("resource must have been created at this point");
3711 assert_eq!(res.text(), "TEST\n\nThis is a test.\n", "resource text");
3712 assert_eq!(store.annotations_len(), 7, "number of annotations");
3713 let annotation = store.annotation("emphasis").expect("annotation must have been created at this point");
3714 assert_eq!(annotation.text_simple(), Some("test"));
3715 let key = store.key("urn:stam-fromhtml", "style").expect("key must exist");
3717 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("color:green"));
3718 let key = store.key("urn:stam-fromhtml", "title").expect("key must exist");
3719 let annotation = res.annotations_as_metadata().filter_key(&key).next().expect("annotation");
3720 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("test"));
3721 let bodyannotation = store.annotation("body").expect("body annotation not found");
3722 let title1 = store.key("urn:stam-fromhtml", "title_from_parent").expect("key must exist");
3723 let title2 = store.key("urn:stam-fromhtml", "title_from_root").expect("key must exist");
3724 assert_eq!(bodyannotation.data().filter_key(&title1).value_as_str(), Some("test"));
3725 assert_eq!(bodyannotation.data().filter_key(&title2).value_as_str(), Some("test"));
3726 Ok(())
3727 }
3728
3729 #[test]
3730 fn test_full() -> Result<(), String> {
3731 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3732 let mut store = stam::AnnotationStore::new(stam::Config::new());
3733 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3734 let res = store.resource("test").expect("resource must have been created at this point");
3735 assert_eq!(res.text(), XMLEXAMPLE_TEXTOUTPUT, "resource text");
3736 Ok(())
3737 }
3738
3739 #[test]
3740 fn test_firstfruit() -> Result<(), String> {
3741 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3742 let mut store = stam::AnnotationStore::new(stam::Config::new());
3743 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3744 let bodyannotation = store.annotation("body").expect("body annotation not found");
3745 let fruit = store.key("urn:stam-fromhtml", "firstfruit").expect("key must exist");
3746 assert_eq!(bodyannotation.data().filter_key(&fruit).value_as_str(), Some("apple") );
3747 Ok(())
3748 }
3749
3750 #[test]
3751 fn test_fruits() -> Result<(), String> {
3752 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3753 let mut store = stam::AnnotationStore::new(stam::Config::new());
3754 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3755 let bodyannotation = store.annotation("body").expect("body annotation not found");
3756 let fruits = store.key("urn:stam-fromhtml", "fruits").expect("key must exist");
3757 assert_eq!(bodyannotation.data().filter_key(&fruits).value(), Some(&DataValue::List(vec!("apple".into(),"banana".into(),"melon".into()) )));
3758 Ok(())
3759 }
3760
3761 #[test]
3762 fn test_multifruits() -> Result<(), String> {
3763 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3764 let mut store = stam::AnnotationStore::new(stam::Config::new());
3765 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3766 let bodyannotation = store.annotation("body").expect("body annotation not found");
3767 let fruits = store.key("urn:stam-fromhtml", "multifruits").expect("key must exist");
3768 let results: Vec<_> = bodyannotation.data().filter_key(&fruits).collect();
3769 assert_eq!(results.len(), 3);
3770 assert_eq!(results.get(0).unwrap().value(),&DataValue::String("apple".to_string()) );
3771 assert_eq!(results.get(1).unwrap().value(),&DataValue::String("banana".to_string()) );
3772 assert_eq!(results.get(2).unwrap().value(),&DataValue::String("melon".to_string()) );
3773 Ok(())
3774 }
3775
3776 #[test]
3777 fn test_teispace() -> Result<(), String> {
3778 let config = XmlConversionConfig::from_toml_str(CONF)?;
3779 let mut store = stam::AnnotationStore::new(stam::Config::new());
3780 from_xml_in_memory("test", XMLTEISPACE, &config, &mut store)?;
3781 let res = store.resource("test").expect("resource must have been created at this point");
3782 assert_eq!(res.text(), "\n\n\n\n", "resource text");
3783 Ok(())
3784 }
3785
3786
3787 #[test]
3788 fn test_reqattrib() -> Result<(), String> {
3789 let config = XmlConversionConfig::from_toml_str(CONF)?;
3790 let mut store = stam::AnnotationStore::new(stam::Config::new());
3791 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE, &config, &mut store)?;
3792 let res = store.resource("test").expect("resource must have been created at this point");
3793 assert_eq!(res.text(), "test", "resource text");
3794 let key = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3795 let annotation = store.annotation("ann1").expect("annotation");
3796 assert_eq!(annotation.data().filter_key(&key).value_as_str(), Some("blah"));
3797 assert!(store.key("urn:stam-fromhtml", "optattrib").is_none(), "optional attrib is unused");
3798 Ok(())
3799 }
3800
3801 #[test]
3802 fn test_reqattrib2() -> Result<(), String> {
3803 let mut config = XmlConversionConfig::from_toml_str(CONF)?;
3804 config = config.with_debug(true);
3805 let mut store = stam::AnnotationStore::new(stam::Config::new());
3806 assert!(from_xml_in_memory("test", XMLREQATTRIBEXAMPLE2, &config, &mut store).is_err(), "checking if error is returned");
3807 Ok(())
3808 }
3809
3810 #[test]
3811 fn test_reqattrib3() -> Result<(), String> {
3812 let config = XmlConversionConfig::from_toml_str(CONF)?;
3813 let mut store = stam::AnnotationStore::new(stam::Config::new());
3814 from_xml_in_memory("test", XMLREQATTRIBEXAMPLE3, &config, &mut store)?;
3815 let res = store.resource("test").expect("resource must have been created at this point");
3816 assert_eq!(res.text(), "test", "resource text");
3817 let reqkey = store.key("urn:stam-fromhtml", "requiredattrib").expect("key must exist");
3818 let optkey = store.key("urn:stam-fromhtml", "optattrib").expect("key optattrib must exist");
3819 let annotation = store.annotation("ann1").expect("annotation");
3820 assert_eq!(annotation.data().filter_key(&reqkey).value_as_str(), Some("blah"));
3821 assert_eq!(annotation.data().filter_key(&optkey).value_as_str(), Some("blah"));
3822 Ok(())
3823 }
3824
3825 #[test]
3826 fn test_map() -> Result<(), String> {
3827 let config = XmlConversionConfig::from_toml_str(CONF)?;
3828 let mut store = stam::AnnotationStore::new(stam::Config::new());
3829 from_xml_in_memory("test", XMLMAPEXAMPLE, &config, &mut store)?;
3830 let res = store.resource("test").expect("resource must have been created at this point");
3831 assert_eq!(res.text(), "test", "resource text");
3832 let key = store.key("urn:stam-fromhtml", "map").expect("key must exist");
3833 let annotation = store.annotation("ann1").expect("annotation");
3834 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3835 if let DataValue::Map(data) = data {
3836 assert_eq!(data.get("text"), Some(&DataValue::String("test".into())));
3837 assert_eq!(data.get("number"), Some(&DataValue::Int(42)));
3838 assert_eq!(data.get("bogus"), Some(&DataValue::Bool(true)));
3839 assert_eq!(data.len(), 3);
3840 } else {
3841 assert!(false, "Data is supposed to be a map");
3842 }
3843 Ok(())
3844 }
3845
3846 #[test]
3847 fn test_metadata() -> Result<(), String> {
3848 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3849 let mut store = stam::AnnotationStore::new(stam::Config::new());
3850 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3851 let annotation = store.annotation("metadata").expect("annotation");
3852 let key = store.key("urn:stam-fromhtml", "author").expect("key must exist");
3853 let data = annotation.data().filter_key(&key).value().expect("data must exist");
3854 assert_eq!(data, &DataValue::String("proycon".into()));
3855 Ok(())
3856 }
3857
3858 #[test]
3859 fn test_datavalue_int() -> Result<(), String> {
3860 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3861 let mut store = stam::AnnotationStore::new(stam::Config::new());
3862 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3863 let annotation = store.annotation("p1").expect("annotation not found");
3864 let key = store.key("urn:stam-fromhtml", "n").expect("key must exist");
3865 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::Int(1)));
3866 Ok(())
3867 }
3868
3869 #[test]
3870 fn test_datavalue_string() -> Result<(), String> {
3871 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3872 let mut store = stam::AnnotationStore::new(stam::Config::new());
3873 from_xml_in_memory("test", XMLSMALLEXAMPLE, &config, &mut store)?;
3874 let annotation = store.annotation("p1").expect("annotation not found");
3875 let key = store.key("urn:stam-fromhtml", "nstring").expect("key must exist");
3876 assert_eq!(annotation.data().filter_key(&key).value(), Some(&DataValue::String("001".to_string())));
3877 Ok(())
3878 }
3879
3880 #[test]
3881 fn test_doubleslash_selector_root() -> Result<(), String> {
3882 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3883 let mut store = stam::AnnotationStore::new(stam::Config::new());
3884 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3885 assert!( store.annotation("h1").is_some());
3886 Ok(())
3887 }
3888
3889 #[test]
3890 fn test_doubleslash_selector_infix_none() -> Result<(), String> {
3891 let config = XmlConversionConfig::from_toml_str(CONF)?.with_debug(true);
3892 let mut store = stam::AnnotationStore::new(stam::Config::new());
3893 from_xml_in_memory("test", XMLEXAMPLE, &config, &mut store)?;
3894 assert!( store.annotation("h2").is_some());
3895 Ok(())
3896 }
3897
3898
3899}