stam/
text.rs

1/*
2    STAM Library (Stand-off Text Annotation Model)
3        by Maarten van Gompel <proycon@anaproy.nl>
4        Digital Infrastucture, KNAW Humanities Cluster
5
6        Licensed under the GNU General Public License v3
7
8        https://github.com/annotation/stam-rust
9*/
10
11//! This module defines and partially implements the [`Text`] trait.
12
13use crate::error::StamError;
14use crate::selector::Offset;
15use crate::types::*;
16
17/// This trait provides methods that operate on structures that hold or represent text content.
18/// They are fairly low-level methods but are exposed in the public API. The [`FindText`](crate::FindText)
19/// trait subsequently builds upon this one with high-level search methods.
20pub trait Text<'store, 'slf>
21where
22    'store: 'slf,
23{
24    /// Returns a reference to the text
25    fn text(&'slf self) -> &'store str;
26
27    /// Returns the length of the text in unicode points
28    /// For bytes, use `Self::text().len()` instead.
29    fn textlen(&'slf self) -> usize;
30
31    /// Returns a string reference to a slice of text as specified by the offset
32    fn text_by_offset(&'slf self, offset: &Offset) -> Result<&'store str, StamError>;
33
34    /// Finds the utf-8 byte position where the specified text subslice begins
35    /// The returned offset is relative to the TextSelection
36    fn subslice_utf8_offset(&'slf self, subslice: &str) -> Option<usize> {
37        let self_begin = self.text().as_ptr() as usize;
38        let sub_begin = subslice.as_ptr() as usize;
39        if sub_begin < self_begin || sub_begin > self_begin.wrapping_add(self.text().len()) {
40            None
41        } else {
42            Some(sub_begin.wrapping_sub(self_begin))
43        }
44    }
45
46    fn is_empty(&'slf self) -> bool {
47        self.text().is_empty()
48    }
49
50    /// Converts a unicode character position to a UTF-8 byte position
51    fn utf8byte(&'slf self, abscursor: usize) -> Result<usize, StamError>;
52
53    /// Converts a UTF-8 byte position into a unicode position
54    fn utf8byte_to_charpos(&'slf self, bytecursor: usize) -> Result<usize, StamError>;
55
56    /// Resolves a begin-aligned cursor to an absolute cursor (i.e. relative to the TextResource).
57    fn absolute_cursor(&'slf self, cursor: usize) -> usize;
58
59    /// Resolves a relative offset (relative to another TextSelection) to an absolute one (in terms of to the underlying TextResource)
60    fn absolute_offset(&'slf self, offset: &Offset) -> Result<Offset, StamError> {
61        Ok(Offset::simple(
62            self.absolute_cursor(self.beginaligned_cursor(&offset.begin)?),
63            self.absolute_cursor(self.beginaligned_cursor(&offset.end)?),
64        ))
65    }
66
67    /// Resolves a cursor to a begin aligned cursor, resolving all relative end-aligned positions
68    fn beginaligned_cursor(&'slf self, cursor: &Cursor) -> Result<usize, StamError> {
69        match *cursor {
70            Cursor::BeginAligned(cursor) => Ok(cursor),
71            Cursor::EndAligned(cursor) => {
72                if cursor.abs() as usize > self.textlen() {
73                    Err(StamError::CursorOutOfBounds(
74                        Cursor::EndAligned(cursor),
75                        "TextResource::beginaligned_cursor(): end aligned cursor ends up before the beginning",
76                    ))
77                } else {
78                    Ok(self.textlen() - cursor.abs() as usize)
79                }
80            }
81        }
82    }
83}