stam/
types.rs

1/*
2    STAM Library (Stand-off Text Annotation Model)
3        by Maarten van Gompel <proycon@anaproy.nl>
4        Digital Infrastucture, KNAW Humanities Cluster
5
6        Licensed under the GNU General Public License v3
7
8        https://github.com/annotation/stam-rust
9*/
10
11//! This module implements some common types that are found throughout the API, both low and high-level.
12
13use datasize::DataSize;
14use sealed::sealed;
15use std::hash::Hash;
16
17use minicbor::{Decode, Encode};
18use serde::{Deserialize, Serialize};
19
20use crate::config::Config;
21use crate::error::StamError;
22
23/// A cursor points to a specific point in a text. I
24/// Used to select offsets. Units are unicode codepoints (not bytes!)
25/// and are 0-indexed.
26///
27/// The cursor can be either begin-aligned or end-aligned. Where BeginAlignedCursor(0)
28/// is the first unicode codepoint in a referenced text, and EndAlignedCursor(0) the last one.
29#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, DataSize, Encode, Decode)]
30#[serde(tag = "@type", content = "value")]
31pub enum Cursor {
32    /// Cursor relative to the start of a text. Has a value of 0 or higher
33    #[serde(rename = "BeginAlignedCursor")]
34    #[n(0)] //these macros are field index numbers for cbor binary (de)serialisation
35    BeginAligned(#[n(0)] usize),
36
37    /// Cursor relative to the end of a text. Has a value of 0 or lower. The last character of a text begins at `Cursor::EndAligned(-1)` and ends at `Cursor::EndAligned(0)`
38    #[serde(rename = "EndAlignedCursor")]
39    #[n(1)]
40    EndAligned(#[n(0)] isize),
41}
42
43impl From<usize> for Cursor {
44    fn from(cursor: usize) -> Self {
45        Self::BeginAligned(cursor)
46    }
47}
48
49impl TryFrom<isize> for Cursor {
50    type Error = StamError;
51    fn try_from(cursor: isize) -> Result<Self, Self::Error> {
52        if cursor > 0 {
53            Err(StamError::InvalidCursor(format!("{}", cursor), "Cursor is a signed integer and converts to EndAlignedCursor, expected a value <= 0. Convert from an unsigned integer for a normal BeginAlignedCursor"))
54        } else {
55            Ok(Self::EndAligned(cursor))
56        }
57    }
58}
59
60impl TryFrom<Cursor> for usize {
61    type Error = StamError;
62    fn try_from(cursor: Cursor) -> Result<Self, Self::Error> {
63        match cursor {
64            Cursor::BeginAligned(x) => Ok(x),
65            _ => Err(StamError::InvalidCursor(
66                format!("{}", cursor),
67                "Cursor is EndAligned and can't convert to usize",
68            )),
69        }
70    }
71}
72
73impl From<Cursor> for isize {
74    fn from(cursor: Cursor) -> Self {
75        match cursor {
76            Cursor::BeginAligned(x) => x as isize,
77            Cursor::EndAligned(x) => x,
78        }
79    }
80}
81
82impl TryFrom<&str> for Cursor {
83    type Error = StamError;
84    fn try_from(cursor: &str) -> Result<Self, Self::Error> {
85        if cursor.starts_with('-') {
86            //EndAligned
87            let cursor: isize = isize::from_str_radix(cursor, 10).map_err(|_e| {
88                StamError::InvalidCursor(cursor.to_owned(), "Invalid EndAlignedCursor")
89            })?;
90            Cursor::try_from(cursor)
91        } else {
92            //BeginAligned
93            let cursor: usize = usize::from_str_radix(cursor, 10).map_err(|_e| {
94                StamError::InvalidCursor(cursor.to_owned(), "Invalid BeginAlignedCursor")
95            })?;
96            Ok(Cursor::from(cursor))
97        }
98    }
99}
100
101impl std::fmt::Display for Cursor {
102    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
103        match self {
104            Self::EndAligned(0) => write!(f, "-0"), //add sign
105            Self::BeginAligned(x) => write!(f, "{}", x),
106            Self::EndAligned(x) => write!(f, "{}", x), //sign already included
107        }
108    }
109}
110
111impl Cursor {
112    /// Shift this cursor to the right (positive distance) or left (negative distance).
113    /// Will return an error if the cursor exceeds bounds, which depends on the type of cursor.
114    pub fn shift(&self, distance: isize) -> Result<Self, StamError> {
115        match self {
116            Self::BeginAligned(cursor) => {
117                if distance >= 0 {
118                    Ok(Self::BeginAligned(cursor + distance as usize))
119                } else if distance.abs() as usize <= *cursor {
120                    Ok(Self::BeginAligned(cursor - distance.abs() as usize))
121                } else {
122                    Err(StamError::CursorOutOfBounds(
123                        Cursor::BeginAligned(*cursor),
124                        "Can't shift cursor to the left, distance exceeds cursor",
125                    ))
126                }
127            }
128            Self::EndAligned(cursor) => {
129                if distance <= 0 {
130                    Ok(Self::EndAligned(cursor + distance))
131                } else if distance <= cursor.abs() {
132                    Ok(Self::EndAligned(cursor + distance))
133                } else {
134                    Err(StamError::CursorOutOfBounds(
135                        Cursor::EndAligned(*cursor),
136                        "Can't shift cursor to the right, distance exceeds cursor",
137                    ))
138                }
139            }
140        }
141    }
142}
143
144/// The handle trait is implemented for various handle types. They have in common that refer to the internal id
145/// of a [`Storable`](crate::store::Storable) item in a struct implementing [`StoreFor`](crate::store::StoreFor) by index. Types implementing this are lightweight and do not borrow anything, they can be passed and copied freely.
146// To get an actual reference to the item from a handle type, call the [`get()`](StoreFor<T>::get()) method on the store that holds it.
147/// This is a sealed trait, not implementable outside this crate.
148#[sealed(pub(crate))] //<-- this ensures nobody outside this crate can implement the trait
149pub trait Handle:
150    Clone + Copy + core::fmt::Debug + PartialEq + Eq + PartialOrd + Ord + Hash + DataSize
151{
152    /// Create a new handle for an internal ID. You shouldn't need to use this as handles will always be generated for you by higher-level functions.
153    /// In fact, creating them yourself like this should be considered dangerous!
154    fn new(intid: usize) -> Self;
155
156    /// Returns the internal index for this handle.
157    fn as_usize(&self) -> usize;
158
159    /// Low-level method to compute a new handle based on a list of gaps, used by reindexers. There is usually no reason to call this yourself.
160    fn reindex(&self, gaps: &[(Self, isize)]) -> Self {
161        let mut delta = 0;
162        for (gaphandle, gapdelta) in gaps.iter() {
163            if gaphandle.as_usize() < self.as_usize() {
164                delta += gapdelta;
165            } else {
166                break;
167            }
168        }
169        Self::new((self.as_usize() as isize + delta) as usize)
170    }
171}
172
173/// This trait provides some introspection on STAM data types. It is a sealed trait that can not be implemented.
174#[sealed(pub(crate))] //<-- this ensures nobody outside this crate can implement the trait
175pub trait TypeInfo {
176    /// Return the type (introspection).
177    fn typeinfo() -> Type;
178
179    /// Return the prefix for temporary identifiers of this type
180    fn temp_id_prefix() -> &'static str {
181        match Self::typeinfo() {
182            Type::AnnotationStore => "!Z",
183            Type::Annotation => "!A",
184            Type::AnnotationDataSet => "!S",
185            Type::AnnotationData => "!D",
186            Type::DataKey => "!K",
187            Type::DataValue => "!V",
188            Type::TextResource => "!R",
189            Type::TextSelection => "!T",
190            Type::TextSelectionSet => "!X",
191            Type::AnnotationSubStore => "!I",
192            Type::Config => "!C",
193        }
194    }
195}
196
197/// An enumeration of STAM data types. This is used for introspection via [`TypeInfo`].
198#[derive(Clone, Copy, PartialEq, Debug, Serialize, Deserialize)]
199pub enum Type {
200    AnnotationStore,
201    Annotation,
202    AnnotationDataSet,
203    AnnotationData,
204    DataKey,
205    DataValue,
206    TextResource,
207    TextSelection,
208    TextSelectionSet,
209    Config,
210    AnnotationSubStore,
211}
212
213impl TryFrom<&str> for Type {
214    type Error = StamError;
215    fn try_from(val: &str) -> Result<Self, Self::Error> {
216        let val_lower = val.to_lowercase();
217        match val_lower.as_str() {
218            "annotationstore" | "store" => Ok(Self::AnnotationStore),
219            "annotation" | "annotations" => Ok(Self::Annotation),
220            "annotationdataset" | "dataset" | "annotationset" | "annotationdatasets"
221            | "datasets" | "annotationsets" => Ok(Self::AnnotationDataSet),
222            "data" | "annotationdata" => Ok(Self::AnnotationData),
223            "datakey" | "datakeys" | "key" | "keys" => Ok(Self::DataKey),
224            "datavalue" | "value" | "values" => Ok(Self::DataValue),
225            "resource" | "textresource" | "resources" | "textresources" => Ok(Self::TextResource),
226            "textselection" | "textselections" => Ok(Self::TextSelection),
227            "textselectionset" => Ok(Self::TextSelectionSet),
228            "config" | "configuration" => Ok(Self::Config),
229            "annotationsubstore" | "substore" => Ok(Self::AnnotationSubStore),
230            _ => Err(StamError::OtherError("Unknown type supplied")),
231        }
232    }
233}
234
235impl Type {
236    fn as_str(&self) -> &'static str {
237        match self {
238            Self::Annotation => "Annotation",
239            Self::AnnotationData => "AnnotationData",
240            Self::AnnotationDataSet => "AnnotationDataSet",
241            Self::AnnotationStore => "AnnotationStore",
242            Self::DataKey => "DataKey",
243            Self::DataValue => "DataValue",
244            Self::TextResource => "TextResource",
245            Self::TextSelection => "TextSelection",
246            Self::TextSelectionSet => "TextSelectionSet",
247            Self::AnnotationSubStore => "AnnotationSubStore",
248            Self::Config => "Config",
249        }
250    }
251}
252
253impl std::fmt::Display for Type {
254    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
255        write!(f, "{}", self.as_str())
256    }
257}
258
259/// Data formats for serialisation and deserialisation supported by the library.
260#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Decode, Encode)]
261pub enum DataFormat {
262    /// STAM JSON, see the [specification](https://github.com/annotation/stam/blob/master/README.md#stam-json)
263    /// The canonical extension used by the library is `.stam.json`.
264    #[n(0)]
265    Json {
266        #[n(0)]
267        compact: bool,
268    },
269
270    /// Concise Binary Object Representation, a binary format suitable for quick loading and saving, as it also
271    /// holds all indices (unlike STAM JSON/CSV). This should be used for caching only and not as a data interchange
272    /// storage format as the format changes per version of this library (and may even differ based on compile-time options).
273    ///
274    /// The canonical extension used by the library is `.stam.cbor`.
275    #[n(1)]
276    CBOR,
277
278    /// STAM CSV, see the [specification](https://github.com/annotation/stam/tree/master/extensions/stam-csv)
279    ///
280    /// The canonical extension used by the library is `.stam.csv`.
281    #[cfg(feature = "csv")]
282    #[n(2)]
283    Csv,
284}
285
286impl std::fmt::Display for DataFormat {
287    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
288        match self {
289            Self::Json { .. } => write!(f, "json"),
290
291            Self::CBOR { .. } => write!(f, "cbor"),
292
293            #[cfg(feature = "csv")]
294            Self::Csv => write!(f, "csv"),
295        }
296    }
297}
298
299impl TryFrom<&str> for DataFormat {
300    type Error = StamError;
301    fn try_from(s: &str) -> Result<Self, Self::Error> {
302        match s {
303            "json" | "Json" | "JSON" => Ok(Self::Json { compact: false }),
304            "json-compact" | "Json-compact" | "JSON-compact" => Ok(Self::Json { compact: true }),
305            "cbor" => Ok(Self::CBOR),
306
307            #[cfg(feature = "csv")]
308            "csv" | "Csv" | "CSV" => Ok(Self::Csv),
309
310            _ => Err(StamError::OtherError("Invalid value for DataFormat")),
311        }
312    }
313}
314
315pub(crate) fn debug<F>(config: &Config, message_func: F)
316where
317    F: FnOnce() -> String,
318{
319    if config.debug {
320        eprintln!("[STAM DEBUG] {}", message_func());
321    }
322}