stam 0.18.7

STAM is a powerful library for dealing with stand-off annotations on text. This is the Rust library.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
use datasize::{data_size, DataSize};
use minicbor::{Decode, Encode};
use sealed::sealed;
use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;
use std::path::PathBuf;

use crate::annotation::{Annotation, AnnotationHandle};
use crate::annotationdataset::{AnnotationDataSet, AnnotationDataSetHandle};
use crate::annotationstore::AnnotationStore;
use crate::config::Configurable;
use crate::error::StamError;
use crate::file::*;
use crate::json::{FromJson, ToJson};
use crate::resources::TextResource;
use crate::resources::TextResourceHandle;
use crate::store::*;
use crate::types::*;

#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, DataSize, Encode, Decode)]
#[cbor(transparent)]
pub struct AnnotationSubStoreHandle(#[n(0)] u16);

#[sealed]
impl Handle for AnnotationSubStoreHandle {
    fn new(intid: usize) -> Self {
        Self(intid as u16)
    }
    fn as_usize(&self) -> usize {
        self.0 as usize
    }
}

/// A substore is a sub-collection of annotations that is serialised as an independent AnnotationStore,
/// The actual contents are still defined and kept by the parent AnnotationStore.
/// This structure only holds references used for serialisation purposes.
#[derive(Debug, Encode, Decode, Default, PartialEq, Clone, DataSize)]
pub struct AnnotationSubStore {
    ///Internal numeric ID, corresponds with the index in the AnnotationStore::substores that has the ownership
    #[n(0)]
    intid: Option<AnnotationSubStoreHandle>,

    //these macros are field index numbers for cbor binary (de)serialisation, which itself does not allow stand-off files!
    #[n(1)]
    pub(crate) id: Option<String>,

    /// path associated with this substore
    #[n(2)]
    pub(crate) filename: Option<PathBuf>,

    #[n(3)]
    /// Refers to indices in substores, indicating which are the parents of the curent substore.
    /// A value of `None` means the root store is the parent.
    /// This allows for deeper nesting, it is set to None if this is a first level substore
    pub(crate) parents: Vec<Option<AnnotationSubStoreHandle>>,

    #[n(4)]
    pub(crate) annotations: Vec<AnnotationHandle>,
    #[n(5)]
    pub(crate) annotationsets: Vec<AnnotationDataSetHandle>,
    #[n(6)]
    pub(crate) resources: Vec<TextResourceHandle>,
}

impl AnnotationSubStore {
    /// Returns the ID of the annotation store (if any)
    pub fn id(&self) -> Option<&str> {
        self.id.as_deref()
    }

    /// Returns the filename of the annotation store (if any)
    pub fn filename(&self) -> Option<&PathBuf> {
        self.filename.as_ref()
    }

    /// Returns a lower-bound estimate of memory usage in bytes
    pub fn meminfo(&self) -> usize {
        return data_size(self);
    }

    /// Returns the number of annotations in the store (deletions are not substracted)
    pub fn annotations_len(&self) -> usize {
        self.annotations.len()
    }

    /// Returns the number of resources  in the store (deletions are not substracted)
    pub fn resources_len(&self) -> usize {
        self.resources.len()
    }

    /// Returns the number of datasets in the store (deletions are not substracted)
    pub fn datasets_len(&self) -> usize {
        self.annotationsets.len()
    }

    pub fn parents(&self) -> &Vec<Option<AnnotationSubStoreHandle>> {
        &self.parents
    }

    /// Sets the parent of this substore, may be called multiple times to add multiple parents!
    /// The value is wrapped in an option, None means the root store is the parent
    pub fn with_parent(mut self, index: Option<AnnotationSubStoreHandle>) -> Self {
        self.add_parent(index);
        self
    }

    /// Sets the parent of this substore, may be called multiple times to add multiple parents!
    /// The value is wrapped in an option, None means the root store is the parent
    pub fn add_parent(&mut self, index: Option<AnnotationSubStoreHandle>) {
        self.parents.push(index);
    }

    /// Sets the parents of this substore
    pub fn with_parents(mut self, parents: Vec<Option<AnnotationSubStoreHandle>>) -> Self {
        self.parents = parents;
        self
    }

    /// Sets the filename of this substore
    pub fn with_filename(mut self, filename: &str) -> Self {
        self.filename = Some(filename.into());
        self
    }
}

#[sealed]
impl TypeInfo for AnnotationSubStore {
    fn typeinfo() -> Type {
        Type::AnnotationSubStore
    }
}

impl AnnotationSubStore {}

//these I couldn't solve nicely using generics:

impl<'a> Request<AnnotationSubStore> for AnnotationSubStoreHandle {
    fn to_handle<'store, S>(&self, _store: &'store S) -> Option<AnnotationSubStoreHandle>
    where
        S: StoreFor<AnnotationSubStore>,
    {
        Some(*self)
    }
}

#[sealed]
impl Storable for AnnotationSubStore {
    type HandleType = AnnotationSubStoreHandle;
    type StoreHandleType = ();
    type FullHandleType = Self::HandleType;
    type StoreType = AnnotationStore;

    fn id(&self) -> Option<&str> {
        self.id.as_deref()
    }
    fn with_id(mut self, id: impl Into<String>) -> Self {
        self.id = Some(id.into());
        self
    }

    fn handle(&self) -> Option<Self::HandleType> {
        self.intid
    }

    fn with_handle(mut self, handle: AnnotationSubStoreHandle) -> Self {
        self.intid = Some(handle);
        self
    }
    fn carries_id() -> bool {
        true
    }

    fn fullhandle(
        _storehandle: Self::StoreHandleType,
        handle: Self::HandleType,
    ) -> Self::FullHandleType {
        handle
    }

    fn merge(&mut self, _other: Self) -> Result<(), StamError> {
        Ok(())
    }

    fn unbind(mut self) -> Self {
        self.intid = None;
        self
    }
}

impl AnnotationStore {
    /// Adds another existing AnnotationStore as a stand-off dependency (uses the @include mechanism in STAM JSON)
    /// If you want to start a new substore that does not exist yet, use [`add_new_substore`] instead.
    pub fn add_substore(&mut self, filename: &str) -> Result<AnnotationSubStoreHandle, StamError> {
        if !self.substores.is_empty() {
            // check if the substore is already loaded (it may be referenced from multiple places)
            // in that case we don't need to process it again
            let foundpath = Some(get_filepath(filename, self.config.workdir())?);
            let mut foundsubstore = None;
            for substore in <Self as StoreFor<AnnotationSubStore>>::iter(self) {
                if substore.filename == foundpath || substore.filename == Some(filename.into()) {
                    foundsubstore = Some(substore.handle().expect("substore must have handle"));
                    break;
                }
            }
            if let Some(foundsubstore) = foundsubstore {
                let parent_handle = self.config.current_substore_path.last().copied();
                let substore: &mut AnnotationSubStore = self.get_mut(foundsubstore)?;
                substore.add_parent(parent_handle);
                return Ok(foundsubstore);
            }
        }

        let parent_handle = self.config.current_substore_path.last().copied();
        let handle = self.insert(AnnotationSubStore::default().with_parent(parent_handle))?; //this data will be modified whilst parsing
        debug(self.config(), || {
            format!(
                "AnnotationStore.add_substore: adding substore filename={:?}, parent={:?}",
                filename, parent_handle
            )
        });
        self.push_current_substore(handle);
        self.merge_json_file(filename)?;
        self.pop_current_substore();
        Ok(handle)
    }

    /// Adds a new AnnotationStore as a stand-off dependency (uses the @include mechanism in STAM JSON)
    /// It will be created from scratch (or overwritten!)
    /// If you want to add an already existing AnnotationStore as a substore, use [`add_substore`] instead.
    pub fn add_new_substore(
        &mut self,
        id: impl Into<String>,
        filename: &str,
    ) -> Result<AnnotationSubStoreHandle, StamError> {
        if !self.substores.is_empty() {
            // check if the substore is already loaded (it may be referenced from multiple places)
            // in that case we don't need to process it again
            let foundpath = Some(get_filepath(filename, self.config.workdir())?);
            let mut foundsubstore = None;
            for substore in <Self as StoreFor<AnnotationSubStore>>::iter(self) {
                if substore.filename == foundpath || substore.filename == Some(filename.into()) {
                    foundsubstore = Some(substore.handle().expect("substore must have handle"));
                    break;
                }
            }
            if let Some(foundsubstore) = foundsubstore {
                let parent_handle = self.config.current_substore_path.last().copied();
                let substore: &mut AnnotationSubStore = self.get_mut(foundsubstore)?;
                substore.add_parent(parent_handle);
                return Ok(foundsubstore);
            }
        }

        let parent_handle = self.config.current_substore_path.last().copied();
        let handle = self.insert(
            AnnotationSubStore::default()
                .with_id(id)
                .with_filename(filename)
                .with_parent(parent_handle),
        )?; //this data will be modified whilst parsing
        debug(self.config(), || {
            format!(
                "AnnotationStore.add_substore: adding substore filename={:?}, parent={:?}",
                filename, parent_handle
            )
        });
        Ok(handle)
    }

    /// used to add a substore to the path, indicating which substore is currently being parsed
    /// influences the behaviour of add_substore()
    fn push_current_substore(&mut self, index: AnnotationSubStoreHandle) {
        self.config.current_substore_path.push(index);
    }

    /// used to add a substore to the path, indicating which substore is currently being parsed
    /// influences the behaviour of add_substore()
    fn pop_current_substore(&mut self) -> bool {
        self.config.current_substore_path.pop().is_some()
    }
}

pub trait AssociateSubStore<T>
where
    T: Storable,
{
    /// Assigns an item to a substore.
    /// Depending on the type of item, this can be either an exclusive assignment (one-to-one) or allow multiple (one-to-many)
    /// Annotations are always exclusive (one-to-one), Resources and datasets can be one-to-many if
    /// and only if they are stand-off (i.e. they have an associated filename and use the @include
    /// mechanism).
    /// If this is called on exclusive items, they old substore will be unassigned before the new one is assigned.
    fn associate_substore(
        &mut self,
        item: impl Request<T>,
        substore: impl Request<AnnotationSubStore>,
    ) -> Result<(), StamError>;
}

impl AssociateSubStore<Annotation> for AnnotationStore {
    fn associate_substore(
        &mut self,
        item: impl Request<Annotation>,
        substore: impl Request<AnnotationSubStore>,
    ) -> Result<(), StamError> {
        if let Some(handle) = item.to_handle(self) {
            //check if the item is already assigned to a substore
            //as this is an exclusive relation (unlike resources/datasets that use @include)
            if let Some(substore_handle) = self.annotation_substore_map.get(handle) {
                //then first remove it from the substore
                let substore = self.get_mut(substore_handle)?;
                if let Some(pos) = substore.annotations.iter().position(|x| *x == handle) {
                    substore.annotations.remove(pos);
                }
                self.annotation_substore_map.remove_all(handle);
            }

            let substore = self.get_mut(substore)?;
            let substore_handle = substore.handle().expect("substore must have handle");
            substore.annotations.push(handle);
            self.annotation_substore_map.insert(handle, substore_handle);
            Ok(())
        } else {
            Err(StamError::NotFoundError(
                Type::Annotation,
                item.requested_id_owned()
                    .unwrap_or("(ID not known at this point)".into()),
            ))
        }
    }
}

impl AssociateSubStore<TextResource> for AnnotationStore {
    fn associate_substore(
        &mut self,
        item: impl Request<TextResource>,
        substore: impl Request<AnnotationSubStore>,
    ) -> Result<(), StamError> {
        if let Some(handle) = item.to_handle(self) {
            let resource = self.get(handle)?;
            if resource.filename().is_some() {
                //the resource is not stand-off, so the relation is exclusive
                //check if the item is already assigned to a substore
                if let Some(substore_handles) = self.resource_substore_map.get(handle) {
                    let substore_handles: Vec<_> = substore_handles.clone();
                    for substore_handle in substore_handles {
                        //then first remove it from the substore
                        let substore = self.get_mut(substore_handle)?;
                        if let Some(pos) = substore.resources.iter().position(|x| *x == handle) {
                            substore.resources.remove(pos);
                        }
                    }
                    self.resource_substore_map.remove_all(handle);
                }
            }

            let substore = self.get_mut(substore)?;
            let substore_handle = substore.handle().expect("substore must have handle");
            if !substore.resources.contains(&handle) {
                substore.resources.push(handle);
            }
            self.resource_substore_map.insert(handle, substore_handle);
            Ok(())
        } else {
            Err(StamError::NotFoundError(
                Type::Annotation,
                item.requested_id_owned()
                    .unwrap_or("(ID not known at this point)".into()),
            ))
        }
    }
}

impl AssociateSubStore<AnnotationDataSet> for AnnotationStore {
    fn associate_substore(
        &mut self,
        item: impl Request<AnnotationDataSet>,
        substore: impl Request<AnnotationSubStore>,
    ) -> Result<(), StamError> {
        if let Some(handle) = item.to_handle(self) {
            let dataset = self.get(handle)?;
            if dataset.filename().is_some() {
                //the dataset is not stand-off, so the relation is exclusive
                //check if the item is already assigned to a substore
                if let Some(substore_handles) = self.dataset_substore_map.get(handle) {
                    let substore_handles: Vec<_> = substore_handles.clone();
                    for substore_handle in substore_handles {
                        //then first remove it from the substore
                        let substore = self.get_mut(substore_handle)?;
                        if let Some(pos) = substore.annotationsets.iter().position(|x| *x == handle)
                        {
                            substore.annotationsets.remove(pos);
                        }
                    }
                    self.dataset_substore_map.remove_all(handle);
                }
            }

            let substore = self.get_mut(substore)?;
            let substore_handle = substore.handle().expect("substore must have handle");
            if !substore.annotationsets.contains(&handle) {
                substore.annotationsets.push(handle);
            }
            self.dataset_substore_map.insert(handle, substore_handle);
            Ok(())
        } else {
            Err(StamError::NotFoundError(
                Type::Annotation,
                item.requested_id_owned()
                    .unwrap_or("(ID not known at this point)".into()),
            ))
        }
    }
}

// v-- these use some higher level API concepts (since we need a reference to the whole store for serialisation)

impl<'store> Serialize for ResultItem<'store, AnnotationSubStore> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut state = serializer.serialize_struct("AnnotationStore", 3)?;
        state.serialize_field("@type", "AnnotationStore")?;
        if let Some(id) = self.id() {
            state.serialize_field("@id", id)?;
        }
        let substores: Vec<_> = self.substores().collect();
        let substore_handle = self.handle();
        if !substores.is_empty() {
            if substores.len() == 1 {
                if let Some(substore) =
                    <AnnotationStore as StoreFor<AnnotationSubStore>>::iter(self.store())
                        .filter(|substore| substore.parents.contains(&Some(substore_handle)))
                        .next()
                {
                    state.serialize_field(
                        "@include",
                        substore.filename().ok_or(serde::ser::Error::custom(
                            "substore must have filename or can not be serialised",
                        ))?,
                    )?;
                }
            } else {
                let substores_filenames: Vec<_> =
                    <AnnotationStore as StoreFor<AnnotationSubStore>>::iter(self.store())
                        .filter(|substore| substore.parents.contains(&Some(substore_handle)))
                        .filter_map(|substore| substore.filename())
                        .collect();
                state.serialize_field("@include", &substores_filenames)?;
            }
        }
        let wrappedstore: WrappedStore<TextResource, AnnotationStore> =
            self.store().wrap_store(Some(substore_handle));
        state.serialize_field("resources", &wrappedstore)?;
        let wrappedstore: WrappedStore<AnnotationDataSet, AnnotationStore> =
            self.store().wrap_store(Some(substore_handle));
        state.serialize_field("annotationsets", &wrappedstore)?;
        let wrappedstore: WrappedStore<Annotation, AnnotationStore> =
            self.store().wrap_store(Some(substore_handle));
        state.serialize_field("annotations", &wrappedstore)?;
        state.end()
    }
}

impl<'store> ResultItem<'store, AnnotationSubStore> {
    pub fn save(&self) -> Result<(), StamError> {
        let new_config = self.store().new_config();

        debug(self.store().config(), || {
            format!(
                "AnnotationSubStore.save: filename={:?}, workdir={:?}",
                self.as_ref().filename(),
                new_config.workdir()
            )
        });

        if let Some(filename) = self.as_ref().filename.as_ref() {
            match self.store().config().dataformat {
                DataFormat::Json { .. } => {
                    self.to_json_file(
                        filename.to_str().expect("filename must be valid UTF-8"),
                        &new_config,
                    ) //may produce 1 or multiple files
                }
                _ => Err(StamError::SerializationError(
                    "Only JSON serialisation is supported for substores".to_owned(),
                )),
            }
        } else {
            Err(StamError::SerializationError(
                "No filename associated with the store".to_owned(),
            ))
        }
    }
}

impl<'store> ToJson for ResultItem<'store, AnnotationSubStore> {}