1use crate::api::*;
2use crate::AnnotationDataSet;
3use crate::DataValue;
4use crate::Selector;
5use crate::TextResource;
6use chrono::Local;
7use smallvec::{smallvec, SmallVec};
8
9use nanoid::nanoid;
10use std::borrow::Cow;
11
12const CONTEXT_ANNO: &str = "http://www.w3.org/ns/anno.jsonld";
13const NS_ANNO: &str = "http://www.w3.org/ns/anno/";
14const NS_RDF: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
15
16pub trait IRI<'store> {
17 fn iri(&self, default_prefix: &str) -> Option<Cow<'store, str>>;
21}
22
23impl<'store> IRI<'store> for ResultItem<'store, DataKey> {
24 fn iri(&self, default_set_prefix: &str) -> Option<Cow<'store, str>> {
25 Some(into_iri(
26 self.id().expect("key must have an ID"),
27 &self
28 .set()
29 .iri(default_set_prefix)
30 .expect("set must have an ID"),
31 ))
32 }
33}
34
35impl<'store> IRI<'store> for ResultItem<'store, Annotation> {
36 fn iri(&self, default_prefix: &str) -> Option<Cow<'store, str>> {
37 self.id().map(|x| into_iri(x, default_prefix))
38 }
39}
40impl<'store> IRI<'store> for ResultItem<'store, TextResource> {
41 fn iri(&self, default_prefix: &str) -> Option<Cow<'store, str>> {
42 self.id().map(|x| into_iri(x, default_prefix))
43 }
44}
45impl<'store> IRI<'store> for ResultItem<'store, AnnotationDataSet> {
46 fn iri(&self, default_prefix: &str) -> Option<Cow<'store, str>> {
47 self.id().map(|x| into_iri(x, default_prefix))
48 }
49}
50
51fn invalid_in_iri(c: char) -> bool {
53 c == ' ' || c == '\t' || c == '\n' || c == '"'
54}
55
56pub fn is_iri(s: &str) -> bool {
58 if let Some(pos) = s.find(":") {
59 if s.find(invalid_in_iri).is_some() {
60 return false;
61 }
62 let scheme = &s[..pos];
63 match scheme {
64 "http" | "https" | "urn" | "file" | "_" => true,
65 _ => false,
66 }
67 } else {
68 false
69 }
70}
71
72fn into_iri<'a>(s: &'a str, mut prefix: &str) -> Cow<'a, str> {
74 if is_iri(s) {
75 Cow::Borrowed(s)
76 } else {
77 if prefix.is_empty() {
78 prefix = "_:";
79 }
80 let separator = prefix.chars().last();
81 if separator == Some('/') || separator == Some('#') || separator == Some(':') {
82 Cow::Owned(format!(
83 "{}{}",
84 prefix,
85 s.replace(invalid_in_iri, "-").as_str()
86 ))
87 } else {
88 Cow::Owned(format!(
89 "{}/{}",
90 prefix,
91 s.replace(invalid_in_iri, "-").as_str()
92 ))
93 }
94 }
95}
96
97fn value_to_json(value: &DataValue) -> String {
98 match value {
99 DataValue::String(s) => format!("\"{}\"", s.replace("\n", "\\n").replace("\"", "\\\"")),
100 DataValue::List(l) => {
101 let mut json_out = "[".to_string();
102 for (i, value) in l.iter().enumerate() {
103 if i > 0 {
104 json_out.push(',');
105 }
106 json_out.push_str(&value_to_json(value));
107 }
108 json_out.push(']');
109 json_out
110 }
111 DataValue::Map(m) => {
112 let mut json_out = "{".to_string();
113 for (i, (key, value)) in m.iter().enumerate() {
114 if i > 0 {
115 json_out.push(',');
116 }
117 json_out.push_str(&format!("\"{}\": {}", key, value_to_json(value)));
118 }
119 json_out.push('}');
120 json_out
121 }
122 x => x.to_string(),
123 }
124}
125
126#[derive(Clone, Debug)]
127pub struct WebAnnoConfig {
128 pub default_annotation_iri: String,
130
131 pub generate_annotation_iri: bool,
133
134 pub default_set_iri: String,
136
137 pub default_resource_iri: String,
139
140 pub extra_context: Vec<String>,
145
146 pub auto_generated: bool,
148
149 pub auto_generator: bool,
151
152 pub context_namespaces: Vec<(String, String)>,
154
155 pub extra_target_templates: Vec<String>,
160
161 pub skip_context: bool,
163}
164
165impl Default for WebAnnoConfig {
166 fn default() -> Self {
167 Self {
168 default_annotation_iri: "_:".to_string(),
169 generate_annotation_iri: false,
170 default_set_iri: "_:".to_string(),
171 default_resource_iri: "_:".to_string(),
172 extra_context: Vec::new(),
173 auto_generated: true,
174 auto_generator: true,
175 skip_context: false,
176 context_namespaces: Vec::new(),
177 extra_target_templates: Vec::new(),
178 }
179 }
180}
181
182impl WebAnnoConfig {
183 pub fn with_namespace(mut self, prefix: String, uri: String) -> Self {
184 self.context_namespaces.push((uri, prefix));
185 self
186 }
187
188 pub fn uri_to_namespace<'a>(&self, s: Cow<'a, str>) -> Cow<'a, str> {
190 for (uri_prefix, ns_prefix) in self.context_namespaces.iter() {
191 if s.starts_with(uri_prefix) {
192 return Cow::Owned(format!("{}:{}", ns_prefix, &s[uri_prefix.len()..]));
193 }
194 }
195 s
196 }
197
198 pub fn auto_extra_context(mut self, store: &AnnotationStore) -> Self {
200 for dataset in store.datasets() {
201 if let Some(dataset_id) = dataset.id() {
202 if (dataset_id.ends_with(".jsonld") || dataset_id.ends_with(".json"))
203 && is_iri(dataset_id)
204 {
205 if self.extra_context.iter().all(|x| x != dataset_id) {
206 self.extra_context.push(dataset_id.to_string());
207 }
208 }
209 }
210 }
211 self
212 }
213
214 pub fn serialize_context(&self) -> String {
216 let mut out = String::new();
217 if !self.extra_context.is_empty() || !self.context_namespaces.is_empty() {
218 out += "[ \"";
219 } else {
220 out += "\"";
221 }
222 out += CONTEXT_ANNO;
223 out += "\"";
224 for context in self.extra_context.iter() {
225 if context != CONTEXT_ANNO {
226 out += ", \"";
227 out += context;
228 out += "\"";
229 }
230 }
231 if !self.context_namespaces.is_empty() {
232 out += ", {";
233 for (i, (uri, namespace)) in self.context_namespaces.iter().enumerate() {
234 if i > 0 {
235 out += ", ";
236 }
237 out += "\"";
238 out += namespace;
239 out += "\": \"";
240 out += uri;
241 out += "\"";
242 }
243 out += "}";
244 }
245 if !self.extra_context.is_empty() || !self.context_namespaces.is_empty() {
246 out += " ]";
247 }
248 out
249 }
250}
251
252impl<'store> ResultItem<'store, Annotation> {
253 pub fn to_webannotation(&self, config: &WebAnnoConfig) -> String {
255 if let Selector::AnnotationDataSelector(..) | Selector::DataKeySelector(..) =
256 self.as_ref().target()
257 {
258 return String::new();
260 }
261 let mut ann_out = String::with_capacity(1024);
262 if config.skip_context {
263 ann_out += "{ "
264 } else {
265 ann_out += "{ \"@context\": ";
266 ann_out += &config.serialize_context();
267 ann_out += ",";
268 if let Some(iri) = self.iri(&config.default_annotation_iri) {
269 ann_out += &format!(" \"id\": \"{}\",", iri);
270 } else if config.generate_annotation_iri {
271 let id = nanoid!();
272 ann_out += &format!(
273 " \"id\": \"{}\",",
274 into_iri(&id, &config.default_annotation_iri)
275 )
276 }
277 }
278 ann_out += " \"type\": \"Annotation\",";
279
280 let mut suppress_default_body_type = false;
281 let mut suppress_body_id = false;
282 let mut suppress_auto_generated = false;
283 let mut suppress_auto_generator = false;
284 let mut target_extra_out = String::new();
285
286 let mut body_out = OutputMap::new();
287
288 let mut outputted_to_main = false;
289 for data in self.data() {
291 let key = data.key();
292 let key_id = key.id().expect("keys must have an ID");
293 match data.set().id() {
294 Some(CONTEXT_ANNO) | Some(NS_ANNO) => match key_id {
295 "generated" => {
296 if outputted_to_main {
297 ann_out.push(',');
298 }
299 suppress_auto_generated = true;
300 outputted_to_main = true;
301 ann_out += &output_predicate_datavalue(key_id, data.value(), config);
302 }
303 "generator" => {
304 if outputted_to_main {
305 ann_out.push(',');
306 }
307 suppress_auto_generator = true;
308 outputted_to_main = true;
309 ann_out += &output_predicate_datavalue(key_id, data.value(), config);
310 }
311 "motivation" | "created" | "creator" => {
312 if outputted_to_main {
313 ann_out.push(',');
314 }
315 outputted_to_main = true;
316 ann_out += &output_predicate_datavalue(key_id, data.value(), config);
317 }
318 "target" => {
319 if !target_extra_out.is_empty() {
320 target_extra_out.push(',');
321 }
322 target_extra_out += &value_to_json(data.value());
323 }
324 key_id => {
325 if key_id == "type"
327 || key_id == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
328 {
329 suppress_default_body_type = true; } else if key_id == "id" {
331 suppress_body_id = true;
332 }
333 body_out.add(
334 Cow::Borrowed(key_id),
335 output_datavalue(key_id, data.value(), config),
336 );
337 }
338 },
339 Some(NS_RDF) if key_id == "type" => {
340 suppress_default_body_type = true; body_out.add(
342 Cow::Borrowed(key_id),
343 output_datavalue(key_id, data.value(), config),
344 );
345 }
346 Some(set_id) => {
347 body_out.add(
349 config.uri_to_namespace(
350 if config.extra_context.iter().any(|s| s == set_id) {
351 key.id().expect("key must have ID").into()
353 } else {
354 key.iri(&config.default_set_iri).expect("set must have ID")
356 },
357 ),
358 output_datavalue(key_id, data.value(), config),
359 );
360 }
361 None => unreachable!("all sets should have a public identifier"),
362 }
363 }
364
365 if config.auto_generated && !suppress_auto_generated {
366 ann_out += &format!(" \"generated\": \"{}\",", Local::now().to_rfc3339());
367 }
368 if config.auto_generator && !suppress_auto_generator {
369 ann_out += " \"generator\": { \"id\": \"https://github.com/annotation/stam-rust\", \"type\": \"Software\", \"name\": \"STAM Library\" },";
370 }
371
372 if !body_out.is_empty() {
373 ann_out += " \"body\": {";
374 if !suppress_default_body_type {
375 ann_out += " \"type\": \"Dataset\",";
376 }
377 if !suppress_body_id {
378 if let Some(iri) = self.iri(&config.default_annotation_iri) {
379 ann_out += &format!(" \"id\": \"{}/body\",", iri);
380 } else if config.generate_annotation_iri {
381 let id = nanoid!();
382 ann_out += &format!(
383 " \"id\": \"{}\",",
384 into_iri(&id, &config.default_annotation_iri)
385 )
386 }
387 }
388 let l = body_out.len();
389 for (i, (key, value)) in body_out.iter().enumerate() {
390 ann_out += &format!("\"{}\": {}", key, value);
392 if i < l - 1 {
393 ann_out.push(',');
394 }
395 }
396 ann_out += "},";
397 }
398
399 let mut need_second_pass = false;
401 let output_selector_out = &output_selector(
402 self.as_ref().target(),
403 self.store(),
404 config,
405 false,
406 &mut need_second_pass,
407 false, );
409 if need_second_pass {
410 let second_pass_out = &output_selector(
411 self.as_ref().target(),
412 self.store(),
413 config,
414 false,
415 &mut need_second_pass,
416 true, );
418 if !target_extra_out.is_empty() {
419 ann_out += &format!(
421 " \"target\": [ {}, {}, {} ]",
422 output_selector_out, &second_pass_out, &target_extra_out
423 );
424 } else {
425 ann_out += &format!(
427 " \"target\": [ {}, {} ]",
428 output_selector_out, &second_pass_out
429 );
430 }
431 } else if !target_extra_out.is_empty() {
432 ann_out += &format!(
434 " \"target\": [ {}, {} ]",
435 &output_selector_out, &target_extra_out
436 );
437 } else {
438 ann_out += &format!(" \"target\": {}", &output_selector_out);
440 }
441 ann_out += "}";
442 ann_out
443 }
444}
445
446fn output_predicate_datavalue(
447 predicate: &str,
448 datavalue: &DataValue,
449 config: &WebAnnoConfig,
450) -> String {
451 let value_is_iri = if let DataValue::String(s) = datavalue {
452 is_iri(s)
453 } else {
454 false
455 };
456 if is_iri(predicate) && value_is_iri {
457 if let Some(s) = value_to_alias(predicate, true, datavalue, config, false) {
458 s
460 } else {
461 format!(
464 "\"{}\": {{ \"id\": \"{}\" }}",
465 config.uri_to_namespace(predicate.into()),
466 datavalue
467 )
468 }
469 } else if value_is_iri {
470 if let Some(s) = value_to_alias(predicate, false, datavalue, config, false) {
471 s
473 } else {
474 format!(
475 "\"{}\": {}",
476 config.uri_to_namespace(predicate.into()),
477 &value_to_json(datavalue)
478 )
479 }
480 } else {
481 format!(
482 "\"{}\": {}",
483 config.uri_to_namespace(predicate.into()),
484 &value_to_json(datavalue)
485 )
486 }
487}
488
489#[inline]
490fn value_to_alias(
493 predicate: &str,
494 predicate_is_iri: bool,
495 datavalue: &DataValue,
496 config: &WebAnnoConfig,
497 value_only: bool,
498) -> Option<String> {
499 if !config.extra_context.is_empty() {
500 if let DataValue::String(datavalue) = datavalue {
501 for prefix in config.extra_context.iter() {
502 if datavalue.starts_with(&format!("{}/", prefix.as_str()))
503 || datavalue.starts_with(&format!("{}#", prefix.as_str()))
504 {
505 if value_only {
506 return Some(format!("\"{}\"", &datavalue[prefix.len() + 1..]));
507 } else {
508 return Some(format!(
509 "\"{}\": \"{}\"",
510 if predicate_is_iri {
511 config.uri_to_namespace(predicate.into())
512 } else {
513 predicate.into()
514 },
515 &datavalue[prefix.len() + 1..]
516 ));
517 }
518 }
519 }
520 }
521 }
522 None
523}
524
525fn output_datavalue(predicate: &str, datavalue: &DataValue, config: &WebAnnoConfig) -> String {
526 let value_is_iri = if let DataValue::String(s) = datavalue {
527 is_iri(s)
528 } else {
529 false
530 };
531 if is_iri(predicate) && value_is_iri {
532 if let Some(s) = value_to_alias(predicate, true, datavalue, config, true) {
533 s
535 } else {
536 format!("{{ \"id\": \"{}\" }}", datavalue)
539 }
540 } else if value_is_iri {
541 if let Some(s) = value_to_alias(predicate, false, datavalue, config, true) {
542 s
544 } else {
545 value_to_json(datavalue)
546 }
547 } else {
548 value_to_json(datavalue)
549 }
550}
551
552fn output_selector(
553 selector: &Selector,
554 store: &AnnotationStore,
555 config: &WebAnnoConfig,
556 nested: bool,
557 need_second_pass: &mut bool,
558 second_pass: bool,
559) -> String {
560 let mut ann_out = String::new();
561 match selector {
562 Selector::TextSelector(res_handle, tsel_handle, _)
563 | Selector::AnnotationSelector(_, Some((res_handle, tsel_handle, _))) => {
564 let resource = store.resource(*res_handle).expect("resource must exist");
565 let textselection = resource
566 .as_ref()
567 .get(*tsel_handle)
568 .expect("text selection must exist");
569 if !second_pass {
570 if !config.extra_target_templates.is_empty() && !nested {
571 ann_out += "[";
572 }
573 ann_out += &format!(
574 "{{ \"source\": \"{}\", \"selector\": {{ \"type\": \"TextPositionSelector\", \"start\": {}, \"end\": {} }} }}",
575 into_iri(
576 resource.id().expect("resource must have ID"),
577 &config.default_resource_iri
578 ),
579 textselection.begin(),
580 textselection.end(),
581 );
582 }
583 if (!nested && !second_pass) || (nested && second_pass) {
584 for extra_target_template in config.extra_target_templates.iter() {
585 let mut template = extra_target_template.clone();
586 template = template.replace(
587 "{resource_iri}",
588 &into_iri(
589 resource.id().expect("resource must have ID"),
590 &config.default_resource_iri,
591 ),
592 );
593 template = template
594 .replace("{resource}", resource.id().expect("resource must have ID"));
595 template = template.replace("{begin}", &format!("{}", textselection.begin()));
596 template = template.replace("{end}", &format!("{}", textselection.end()));
597 if !ann_out.is_empty() {
598 ann_out.push(',');
599 }
600 ann_out += &format!("\"{}\"", &template);
601 }
602 if !nested && !second_pass && !config.extra_target_templates.is_empty() {
603 ann_out += " ]";
604 }
605 } else if !config.extra_target_templates.is_empty() && !second_pass {
606 *need_second_pass = true;
608 }
609 }
610 Selector::AnnotationSelector(a_handle, None) => {
611 let annotation = store.annotation(*a_handle).expect("annotation must exist");
612 if let Some(iri) = annotation.iri(&config.default_annotation_iri) {
613 ann_out += &format!("{{ \"id\": \"{}\", \"type\": \"Annotation\" }}", iri);
614 } else {
615 ann_out += "{ \"id\": null }";
616 eprintln!("WARNING: Annotation points to an annotation that has no public ID! Unable to serialize to Web Annotatations");
617 }
618 }
619 Selector::ResourceSelector(res_handle) => {
620 let resource = store.resource(*res_handle).expect("resource must exist");
621 ann_out += &format!(
622 "{{ \"id\": \"{}\", \"type\": \"Text\" }}",
623 into_iri(
624 resource.id().expect("resource must have ID"),
625 &config.default_resource_iri
626 ),
627 );
628 }
629 Selector::DataSetSelector(set_handle) => {
630 let dataset = store.dataset(*set_handle).expect("resource must exist");
631 ann_out += &format!(
632 "{{ \"id\": \"{}\", \"type\": \"Dataset\" }}",
633 into_iri(
634 dataset.id().expect("dataset must have ID"),
635 &config.default_resource_iri
636 ),
637 );
638 }
639 Selector::CompositeSelector(selectors) => {
640 ann_out += "{ \"type\": \"http://www.w3.org/ns/oa#Composite\", \"items\": [";
641 for (i, selector) in selectors.iter().enumerate() {
642 ann_out += &format!(
643 "{}",
644 &output_selector(selector, store, config, true, need_second_pass, second_pass)
645 );
646 if i != selectors.len() - 1 {
647 ann_out += ",";
648 }
649 }
650 ann_out += " ]}";
651 }
652 Selector::MultiSelector(selectors) => {
653 ann_out += "{ \"type\": \"http://www.w3.org/ns/oa#Independents\", \"items\": [";
654 for (i, selector) in selectors.iter().enumerate() {
655 ann_out += &format!(
656 "{}",
657 &output_selector(selector, store, config, true, need_second_pass, second_pass)
658 );
659 if i != selectors.len() - 1 {
660 ann_out += ",";
661 }
662 }
663 ann_out += " ]}";
664 }
665 Selector::DirectionalSelector(selectors) => {
666 ann_out += "{ \"type\": \"http://www.w3.org/ns/oa#List\", \"items\": [";
667 for (i, selector) in selectors.iter().enumerate() {
668 ann_out += &format!(
669 "{}",
670 &output_selector(selector, store, config, true, need_second_pass, second_pass)
671 );
672 if i != selectors.len() - 1 {
673 ann_out += ",";
674 }
675 }
676 ann_out += " ]}";
677 }
678 Selector::DataKeySelector(..) | Selector::AnnotationDataSelector(..) => {
679 if nested {
680 eprintln!("WARNING: DataKeySelector and AnnotationDataSelectors can not be serialized to Web Annotation, skipping!!");
681 } else {
682 unreachable!("DataKeySelector and AnnotationDataSelectors can not be serialized to Web Annotation (was tested earlier)");
683 }
684 }
685 Selector::RangedTextSelector { .. } | Selector::RangedAnnotationSelector { .. } => {
686 if nested {
687 let subselectors: Vec<_> = selector.iter(store, false).collect();
688 for (i, subselector) in subselectors.iter().enumerate() {
689 ann_out += &format!(
690 "{}",
691 &output_selector(
692 &subselector,
693 store,
694 config,
695 true,
696 need_second_pass,
697 second_pass
698 )
699 );
700 if i != subselectors.len() - 1 {
701 ann_out += ",";
702 }
703 }
704 } else {
705 unreachable!(
706 "Internal Ranged selectors can not be serialized directly, they can be serialized only when under a complex selector",
707 );
708 }
709 }
710 }
711 ann_out
712}
713
714#[derive(Default)]
716struct OutputMap<'a>(Vec<(Cow<'a, str>, SmallVec<[String; 1]>)>);
717
718impl<'a> OutputMap<'a> {
719 fn new() -> Self {
720 Self::default()
721 }
722
723 fn add(&mut self, key: Cow<'a, str>, value: String) {
724 let mut value = Some(value);
725 for item in self.0.iter_mut() {
726 if item.0 == key {
727 item.1.push(value.take().unwrap());
728 break;
729 }
730 }
731 if let Some(value) = value {
732 self.0.push((key, smallvec!(value)));
733 }
734 }
735
736 fn is_empty(&self) -> bool {
737 self.0.is_empty()
738 }
739
740 fn len(&self) -> usize {
741 self.0.len()
742 }
743
744 fn iter(&self) -> impl Iterator<Item = (&str, String)> {
745 self.0.iter().map(|(key, value)| {
746 (
747 key.as_ref(),
748 if value.len() == 1 {
749 let s: String = value.join(", ");
750 s
751 } else {
752 format!("[ {} ]", value.join(", ")) },
754 )
755 })
756 }
757}