stam/
lib.rs

1/*
2    STAM Library (Stand-off Text Annotation Model)
3        by Maarten van Gompel <proycon@anaproy.nl>
4        Digital Infrastucture, KNAW Humanities Cluster
5
6        Licensed under the GNU General Public License v3
7
8        https://github.com/annotation/stam-rust
9*/
10
11//! ## Introduction
12//!
13//! STAM is a standalone data model for stand-off text annotation. This is a software library to work with the
14//! model from Rust, and is the primary library/reference implementation for STAM. It aims to
15//! implement the full model as per the [STAM specification](https://github.com/annotation/stam) and most of the
16//! extensions.
17//!
18//! **What can you do with this library?**
19//!
20//! * Keep, build and manipulate an efficient in-memory store of texts and annotations on texts
21//! * Search in annotations, data and text, either programmatically or via the [STAM Query Language](https://github.com/annotation/stam/tree/master/extensions/stam-query).
22//!    * Search annotations by data, textual content, relations between text fragments (overlap, embedding, adjacency, etc).
23//!    * Search in text (incl. via regular expressions) and find annotations targeting found text selections.
24//!    * Elementary text operations with regard for text offsets (splitting text on a delimiter, stripping text).
25//!    * Search in data (set,key,value) and find annotations that use the data.
26//!    * Convert between different kind of offsets (absolute, relative to other structures, UTF-8 bytes vs unicode codepoints, etc)
27//! * Read and write resources and annotations from/to STAM JSON, STAM CSV, or an optimised binary (CBOR) representation.
28//!     * The underlying [STAM model](https://github.com/annotation/stam) aims to be clear and simple. It is flexible and
29//!       does not commit to any vocabulary or annotation paradigm other than stand-off annotation.
30//!
31//! This STAM library is intended as a foundation upon which further applications
32//! can be built that deal with stand-off annotations on text. We implement all the
33//! low-level logic in dealing this so you no longer have to and can focus on your
34//! actual application. The library is written with performance in mind.
35//!
36//! This is the root module for the STAM library. The STAM library consists of two APIs, a
37//! low-level API and a high-level API, the latter is of most interest to end users and is
38//! implemented in `api/*.rs`.
39//!
40//! ## Table of Contents (abridged)
41//!
42//! * [`AnnotationStore`] - The main annotation store that holds everything together.
43//! * **Result items:** - These encapsulate the underlying primary structures and is the main way in which things are returned throughout the high-level API.
44//!     * [`ResultItem<Annotation>`](struct.ResultItem.html#impl-ResultItem<'store,+Annotation>)
45//!     * [`ResultItem<AnnotationDataSet>`](struct.ResultItem.html#impl-ResultItem<'store,+AnnotationDataSet>)
46//!     * [`ResultItem<AnnotationData>`](struct.ResultItem.html#impl-ResultItem<'store,+AnnotationData>)
47//!     * [`ResultItem<DataKey>`](struct.ResultItem.html#impl-ResultItem<'store,+DataKey>)
48//!     * [`ResultItem<TextResource>`](struct.ResultItem.html#impl-ResultItem<'store,+TextResource>)
49//!     * [`ResultTextSelection`]
50//! * **Values and Operators:**
51//!     * [`DataValue`] - Encapsulates an actual value and its type.
52//!     * [`DataOperator`] - Defines a test done on a [`DataValue`]
53//!     * [`TextSelectionOperator`] - Performs a particular comparison of text selections (e.g. overlap, embedding, adjacency, etc..)
54//! * **Iterators:**
55//!     * [`AnnotationIterator`] - Iterator trait to iterate over annotations, typically produced by an `annotations()` method.
56//!     * [`DataIterator`] - Iterator trait to iterate over annotation data, typically produced by a `data()` method.
57//!     * [`TextSelectionIterator`] - iterator (trait), typically produced by a `textselections()` or `related_text()` method.
58//!     * [`ResourcesIterator`] - iterator (trait), typically produced by a `resources()` method.
59//!     * [`KeyIterator`] - iterator (trait), typically produced by a `keys()` method.
60//!     * [`TextIter`] - iterator over actual text, typically produced by a `text()` method.
61//! * **Text operations:**
62//!     * [`FindText`]  - Trait available on textresources and text selections to provide text-searching methods
63//!     * [`Text`] - Lower-level API trait to obtain text.
64//! * **Collections:**
65//!     * [`Annotations`] == [`Handles<Annotation>`] - Arbitrary collection of [`Annotation`] (by reference)
66//!     * [`Data`] == [`Handles<AnnotationData>`] - Arbitrary collection of [`AnnotationData`] (by reference)
67//!     * [`Resources`] ==  [`Handles<TextResource>`] - Arbitrary collection of [`TextResource`] (by reference).
68//!     * [`Keys`] == [`Handles<DataKey>`] - Arbitrary collection of [`DataKey`] (by reference).
69//! * **Querying:**
70//!     * [`Query`] - Holds a query, may be parsed from [STAMQL](https://github.com/annotation/stam/tree/master/extensions/stam-query).
71//!     * [`QueryResultItems`]
72//!     * [`QueryResultItem`]
73//! * **Referencing Text (both high and low-level API):**
74//!     * [`Cursor`] - Points to a text position, position may be relative.
75//!     * [`Offset`] - Range (two cursors) that can be used to selects a text, positions may be relative.
76//! * **Primary structures (low level API)**:
77//!     * [`Annotation`]
78//!     * [`AnnotationDataSet`]
79//!     * [`AnnotationData`]
80//!     * [`TextSelection`]
81//!     * [`TextResource`]
82//!     * [`DataKey`]
83
84mod annotation;
85mod annotationdata;
86mod annotationdataset;
87mod annotationstore;
88mod api;
89mod cbor;
90mod config;
91mod datakey;
92mod datavalue;
93mod error;
94mod file;
95mod json;
96mod resources;
97mod selector;
98mod store;
99mod substore;
100mod text;
101mod textselection;
102mod types;
103
104#[cfg(feature = "csv")]
105mod csv;
106
107#[cfg(feature = "textvalidation")]
108mod textvalidation;
109
110// Our internal crate structure is not very relevant to the outside world,
111// expose all structs and traits in the root namespace, and be explicit about it:
112
113#[cfg(feature = "csv")]
114pub use crate::csv::{FromCsv, ToCsv};
115
116pub use annotation::{Annotation, AnnotationBuilder, AnnotationHandle, ReannotateMode};
117pub use annotationdata::{AnnotationData, AnnotationDataBuilder, AnnotationDataHandle};
118pub use annotationdataset::{AnnotationDataSet, AnnotationDataSetBuilder, AnnotationDataSetHandle};
119pub use annotationstore::AnnotationStore;
120pub use api::*;
121pub use config::{Config, Configurable};
122pub use datakey::{DataKey, DataKeyHandle};
123pub use datavalue::{DataOperator, DataValue};
124pub use error::StamError;
125pub use file::*;
126pub use json::{FromJson, ToJson};
127pub use resources::{
128    PositionMode, TextResource, TextResourceBuilder, TextResourceHandle, TextSelectionIter,
129};
130pub use selector::{
131    Offset, OffsetMode, Selector, SelectorBuilder, SelectorIter, SelectorKind, SelfSelector,
132};
133pub use store::{
134    generate_id, regenerate_id, BuildItem, IdStrategy, Request, ResultItem, StamResult, Storable,
135    Store, StoreFor,
136};
137pub use substore::{AnnotationSubStore, AnnotationSubStoreHandle, AssociateSubStore};
138pub use text::Text;
139pub use textselection::{
140    ResultTextSelection, ResultTextSelectionSet, TestTextSelection, TextSelection,
141    TextSelectionHandle, TextSelectionOperator, TextSelectionSet, TextSelectionSetIntoIter,
142    TextSelectionSetIter,
143};
144pub use types::*;
145
146pub use chrono::{DateTime, FixedOffset, Local, Utc};
147pub use regex::{Regex, RegexBuilder, RegexSet};
148
149mod tests;