1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
/*
STAM Library (Stand-off Text Annotation Model)
by Maarten van Gompel <proycon@anaproy.nl>
Digital Infrastucture, KNAW Humanities Cluster
Licensed under the GNU General Public License v3
https://github.com/annotation/stam-rust
*/
//! This module contains the high-level API for the [`AnnotationStore`]. This API is implemented on
//! The high-level API is characterised by returning items as [`ResultItem<T>`], upon which further
//! object-specific API methods are implemented.
use crate::annotation::Annotation;
use crate::annotationdata::AnnotationData;
use crate::annotationdataset::{AnnotationDataSet, AnnotationDataSetHandle};
use crate::annotationstore::AnnotationStore;
use crate::datakey::{DataKey, DataKeyHandle};
use crate::datavalue::DataOperator;
use crate::resources::TextResource;
use crate::store::*;
use crate::ResultTextSelection;
use crate::{api::*, AnnotationSubStore};
use std::collections::BTreeSet;
impl AnnotationStore {
/// Requests a specific [`TextResource`] from the store to be returned by reference.
/// The `request` parameter encapsulates some kind of identifier, it can be a `&str`, `String` or [`crate::TextResourceHandle`].
///
/// The item is returned as a fat pointer [`ResultItem<TextResource>`]) in an Option.
/// Returns `None` if it does not exist.
pub fn resource<'a>(
&'a self,
request: impl Request<TextResource>,
) -> Option<ResultItem<'a, TextResource>> {
self.get(request).map(|x| x.as_resultitem(self, self)).ok()
}
/// Requests a specific [`AnnotationDataSet`] from the store to be returned by reference.
/// The `request` parameter encapsulates some kind of identifier, it can be a `&str`, [`String`] or [`AnnotationDataSetHandle`].
pub fn dataset<'a>(
&'a self,
request: impl Request<AnnotationDataSet>,
) -> Option<ResultItem<'a, AnnotationDataSet>> {
self.get(request).map(|x| x.as_resultitem(self, self)).ok()
}
/// Requests a specific [`DataKey`] (pertaining to an [`AnnotationDataSet`]) to be returned by reference.
pub fn key<'a>(
&'a self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
) -> Option<ResultItem<'a, DataKey>> {
if let Some(dataset) = self.dataset(set) {
dataset.key(key)
} else {
None
}
}
/// Requests a specific [`AnnotationData`] (pertaining to an [`AnnotationDataSet`]) to be returned by reference.
pub fn annotationdata<'a>(
&'a self,
set: impl Request<AnnotationDataSet>,
data: impl Request<AnnotationData>,
) -> Option<ResultItem<'a, AnnotationData>> {
if let Some(dataset) = self.dataset(set) {
dataset.annotationdata(data)
} else {
None
}
}
/// Requests a specific [`TextSelection`] by handle (pertaining to an [`AnnotationDataSet`]) to be returned by reference.
pub fn textselection<'a>(
&'a self,
resource: impl Request<TextResource>,
handle: TextSelectionHandle,
) -> Option<ResultTextSelection<'a>> {
if let Some(resource) = self.resource(resource) {
resource.textselection_by_handle(handle).ok()
} else {
None
}
}
/// Requests a specific [`Annotation`] from the store to be returned by reference.
/// The `request` parameter encapsulates some kind of identifier, it can be a `&str`,[`String`] or [`AnnotationHandle`](crate::AnnotationHandle).
///
/// The item is returned as a fat pointer [`ResultItem<Annotation>`]),
/// which exposes the high-level API, in an Option.
/// Returns `None` if it does not exist.
pub fn annotation<'a>(
&'a self,
request: impl Request<Annotation>,
) -> Option<ResultItem<'a, Annotation>> {
self.get(request).map(|x| x.as_resultitem(self, self)).ok()
}
/// Requests a specific [`AnnotationSubStore`] from the store to be returned by reference.
/// The `request` parameter encapsulates some kind of identifier, it can be a `&str`, [`String`] or [`AnnotationSubStoreHandle`].
pub fn substore<'a>(
&'a self,
request: impl Request<AnnotationSubStore>,
) -> Option<ResultItem<'a, AnnotationSubStore>> {
self.get(request).map(|x| x.as_resultitem(self, self)).ok()
}
/// Returns an iterator over all text resources ([`TextResource`] instances) in the store.
/// Items are returned as a fat pointer [`ResultItem<TextResource>`]),
/// which exposes the high-level API.
pub fn resources<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, TextResource>>> {
ResultIter::new_sorted(
self.iter()
.map(|item: &TextResource| item.as_resultitem(self, self)),
)
}
/// Returns an iterator over all text resources ([`TextResource`] instances) in the root store.
/// Items are returned as a fat pointer [`ResultItem<TextResource>`]),
/// which exposes the high-level API.
/// Unlike [`resources()`], this will not return datasets from substores.
pub fn resources_no_substores<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, TextResource>>> {
ResultIter::new_sorted(
self.iter()
.map(|item: &TextResource| item.as_resultitem(self, self))
.filter(|res| self.resource_substore_map.get(res.handle()).is_none()),
)
}
/// Returns an iterator over all [`AnnotationDataSet`] instances in the store.
/// Items are returned as a fat pointer [`ResultItem<AnnotationDataSet>`]),
/// which exposes the high-level API.
pub fn datasets<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, AnnotationDataSet>>> {
ResultIter::new_sorted(
self.iter()
.map(|item: &AnnotationDataSet| item.as_resultitem(self, self)),
)
}
/// Returns an iterator over all [`AnnotationDataSet`] instances in the root store.
/// Items are returned as a fat pointer [`ResultItem<AnnotationDataSet>`]),
/// which exposes the high-level API.
/// Unlike [`datasets()`], this will not return datasets from substores.
pub fn datasets_no_substores<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, AnnotationDataSet>>> {
ResultIter::new_sorted(
self.iter()
.map(|item: &AnnotationDataSet| item.as_resultitem(self, self))
.filter(|ds| self.dataset_substore_map.get(ds.handle()).is_none()),
)
}
/// Returns an iterator over all annotations ([`Annotation`] instances) in the store.
/// The resulting iterator yields items as a fat pointer [`ResultItem<Annotation>`]),
/// which exposes the high-level API.
/// Note that this will include all annotations from all substores, if you want only
/// annnotations pertaining to root store, then use [`annotations_no_substores()`] instead.
pub fn annotations<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, Annotation>>> {
ResultIter::new_sorted(
self.iter()
.map(|a: &'a Annotation| a.as_resultitem(self, self)),
)
}
/// Returns an iterator over all annotations ([`Annotation`] instances) in the root store.
/// The resulting iterator yields items as a fat pointer [`ResultItem<Annotation>`]),
/// which exposes the high-level API.
/// Unlike [`annotations()`], this will not return annotations from substores.
pub fn annotations_no_substores<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, Annotation>>> {
ResultIter::new_sorted(
self.iter()
.map(|a: &'a Annotation| a.as_resultitem(self, self))
.filter(|a| self.annotation_substore_map.get(a.handle()).is_none()),
)
}
/// Returns an iterator over all substores ([`AnnotationSubStore`] instances) in the store.
/// The resulting iterator yields items as a fat pointer [`ResultItem<AnnotationSubStore>`]),
/// which exposes the high-level API. Note that each substore may itself consist of substores!
/// If you want a flattened representation, use `substores_flatten()` instead
pub fn substores<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, AnnotationSubStore>>> {
ResultIter::new_sorted(self.iter().filter_map(|a: &'a AnnotationSubStore| {
if a.parents.contains(&None) {
Some(a.as_resultitem(self, self))
} else {
None
}
}))
}
/// Returns an iterator over all substores ([`AnnotationSubStore`] instances) in the store, including substores that are nested in others.
/// The resulting iterator yields items as a fat pointer [`ResultItem<AnnotationSubStore>`]),
/// which exposes the high-level API.
pub fn substores_flatten<'a>(
&'a self,
) -> ResultIter<impl Iterator<Item = ResultItem<'a, AnnotationSubStore>>> {
ResultIter::new_sorted(
self.iter()
.map(|a: &'a AnnotationSubStore| a.as_resultitem(self, self)),
)
}
/// internal helper method
pub(crate) fn find_data_request_resolver<'store>(
&'store self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
) -> Option<(Option<AnnotationDataSetHandle>, Option<DataKeyHandle>)> {
let mut test_set_handle: Option<AnnotationDataSetHandle> = None; //None means 'any' in this context
let mut test_key_handle: Option<DataKeyHandle> = None; //idem
if !set.any() {
if let Ok(set) = self.get(set) {
test_set_handle = Some(set.handle().expect("set must have handle"));
if !key.any() {
test_key_handle = key.to_handle(set);
if test_key_handle.is_none() {
//requested key doesn't exist, bail out early, we won't find anything at all
return None;
}
}
} else {
//requested set doesn't exist, bail out early, we won't find anything at all
return None;
}
} else if !key.any() {
// Not the most elegant solution but it'll have to do, I don't want to wrap this in Result<>, and I don't
// want to be entirely silent about this error either:
eprintln!("STAM warning: Providing a key without a set in data searches is invalid! Key will be ignored!");
}
Some((test_set_handle, test_key_handle))
}
/// Finds [`AnnotationData`] using data search criteria.
/// This returns an iterator over all matches.
///
/// If you are not interested in returning the results but merely testing the presence of particular data,
/// then use [`Self::test_data()`] instead..
///
/// You can pass a boolean (true/false, doesn't matter) or empty string literal for `set` or `key` to represent *any* set/key.
/// To search for any value, `value` must be explicitly set to [`DataOperator::Any`] to return all values.
///
/// Value is a DataOperator that can apply a data test to the value. Use [`DataOperator::Equals`] to search
/// for an exact value. As a shortcut, you can pass `"value".into()` to automatically convert various data types into
/// [`DataOperator::Equals`].
///
/// Example call to retrieve all data indiscriminately: `annotation.find_data(false,false, DataOperator::Any)`
/// .. or just use the alias function `data()`.
///
/// Note: If you pass a `key` you must also pass `set`, otherwise the key will be ignored!! You can not
/// search for keys if you don't know their set!
///
/// ## Example
///
/// ```
/// # use stam::*;
/// # fn main() -> Result<(),StamError> {
/// # let store = AnnotationStore::default()
/// # .with_id("example")
/// # .with_resource(
/// # TextResourceBuilder::new().with_id("myresource").with_text("Hello world")
/// # )?
/// # .with_dataset(
/// # AnnotationDataSetBuilder::new().with_id("mydataset")
/// # )?
/// # .with_annotation(
/// # AnnotationBuilder::new()
/// # .with_id("A1")
/// # .with_target(SelectorBuilder::textselector(
/// # "myresource",
/// # Offset::simple(6, 11),
/// # ))
/// # .with_data_with_id("mydataset", "part-of-speech", "noun", "D1"),
/// # )?;
/// //in this store we have a single annotation, and single annotation data with key 'part-of-speech' and value 'noun':
/// for annotationdata in store.find_data("mydataset", "part-of-speech", DataOperator::Equals("noun".into())) {
/// assert_eq!(annotationdata.id(), Some("D1"));
/// assert_eq!(annotationdata.value(), "noun");
/// }
/// # Ok(())
/// # }
/// ```
///
pub fn find_data<'store, 'q>(
&'store self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
value: DataOperator<'q>,
) -> Box<dyn Iterator<Item = ResultItem<'store, AnnotationData>> + 'store>
where
'q: 'store,
{
if !set.any() {
if let Some(dataset) = self.dataset(set) {
//delegate to the dataset
return dataset.find_data(key, value);
} else {
return Box::new(std::iter::empty());
}
}
//all datasets
if let Some((_, key_handle)) = self.find_data_request_resolver(set, key) {
//iterate over all datasets
let rootstore = self;
let iter: Box<dyn Iterator<Item = ResultItem<'store, AnnotationData>>> = Box::new(
self.datasets()
.map(move |dataset| {
dataset.as_ref().data().filter_map(move |annotationdata| {
if key_handle.is_none() || key_handle.unwrap() == annotationdata.key() {
Some(annotationdata.as_resultitem(dataset.as_ref(), rootstore))
} else {
None
}
})
})
.flatten(),
);
if let DataOperator::Any = value {
iter
} else {
Box::new(iter.filter_value(value))
}
} else {
Box::new(std::iter::empty())
}
}
/// Returns an iterator over all data in all sets.
/// If possible, use a more constrained method (on [`AnnotationDataSet`] or a [`DataKey`]), it will have better performance.
pub fn data<'store>(
&'store self,
) -> Box<dyn Iterator<Item = ResultItem<'store, AnnotationData>> + 'store> {
self.find_data(false, false, DataOperator::Any)
}
/// Returns an iterator over all keys in all sets.
/// If possible, use a more constrained method (on [`AnnotationDataSet`]), it will have better performance.
pub fn keys<'store>(
&'store self,
) -> <BTreeSet<ResultItem<'store, DataKey>> as IntoIterator>::IntoIter {
let keys: BTreeSet<_> = self
.datasets()
.map(|dataset| dataset.keys())
.flatten()
.collect();
keys.into_iter()
}
/// Tests if certain annotation data exists, returns a boolean.
/// If you want to actually retrieve the data, use `find_data()` instead.
///
/// You can pass a boolean (true/false, doesn't matter) or empty string literal for `set` or `key` to represent *any* set/key.
/// To search for any value, `value` must be explicitly set to `DataOperator::Any` to return all values.
///
/// Note: This gives no guarantee that data, although it exists, is actually used by annotations.
pub fn test_data<'store, 'a>(
&'store self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
value: DataOperator<'a>,
) -> bool
where
'a: 'store,
{
self.find_data(set, key, value).test()
}
/// Searches for resources by metadata.
/// Returns an iterator returning both the annotation, as well the annotation data
///
/// This may return the same resource multiple times if different matching data references it!
///
/// If you already have a `ResultItem<AnnotationData>` instance, just use `ResultItem<AnnotationData>.resources_as_metadata()` instead, it'll be much more efficient.
///
/// See [`Self::find_data()`] for further parameter explanation.
pub fn resources_by_metadata<'store, 'a>(
&'store self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
value: DataOperator<'a>,
) -> impl Iterator<
Item = (
ResultItem<'store, TextResource>,
ResultItem<'store, AnnotationData>,
),
>
where
'a: 'store,
{
self.find_data(set, key, value)
.map(|data| {
data.resources_as_metadata()
.into_iter()
.map(move |resource| (resource, data.clone()))
})
.into_iter()
.flatten()
}
/// Searches for datasets by metadata.
/// Returns an iterator returning both the annotation, as well the annotation data
///
/// This may return the same resource multiple times if different matching data references it!
///
/// If you already have a `ResultItem<AnnotationData>` instance, just use `ResultItem<AnnotationData>.resources_as_metadata()` instead, it'll be much more efficient.
///
/// See [`Self::find_data()`] for further parameter explanation.
pub fn datasets_by_metadata<'store, 'a>(
&'store self,
set: impl Request<AnnotationDataSet>,
key: impl Request<DataKey>,
value: DataOperator<'a>,
) -> impl Iterator<
Item = (
ResultItem<'store, AnnotationDataSet>,
ResultItem<'store, AnnotationData>,
),
>
where
'a: 'store,
{
self.find_data(set, key, value)
.map(|data| {
data.datasets()
.into_iter()
.map(move |dataset| (dataset, data.clone()))
})
.into_iter()
.flatten()
}
}