stam/text.rs
1/*
2 STAM Library (Stand-off Text Annotation Model)
3 by Maarten van Gompel <proycon@anaproy.nl>
4 Digital Infrastucture, KNAW Humanities Cluster
5
6 Licensed under the GNU General Public License v3
7
8 https://github.com/annotation/stam-rust
9*/
10
11//! This module defines and partially implements the [`Text`] trait.
12
13use crate::error::StamError;
14use crate::selector::Offset;
15use crate::types::*;
16
17/// This trait provides methods that operate on structures that hold or represent text content.
18/// They are fairly low-level methods but are exposed in the public API. The [`FindText`](crate::FindText)
19/// trait subsequently builds upon this one with high-level search methods.
20pub trait Text<'store, 'slf>
21where
22 'store: 'slf,
23{
24 /// Returns a reference to the text
25 fn text(&'slf self) -> &'store str;
26
27 /// Returns the length of the text in unicode points
28 /// For bytes, use `Self::text().len()` instead.
29 fn textlen(&'slf self) -> usize;
30
31 /// Returns a string reference to a slice of text as specified by the offset
32 fn text_by_offset(&'slf self, offset: &Offset) -> Result<&'store str, StamError>;
33
34 /// Finds the utf-8 byte position where the specified text subslice begins
35 /// The returned offset is relative to the TextSelection
36 fn subslice_utf8_offset(&'slf self, subslice: &str) -> Option<usize> {
37 let self_begin = self.text().as_ptr() as usize;
38 let sub_begin = subslice.as_ptr() as usize;
39 if sub_begin < self_begin || sub_begin > self_begin.wrapping_add(self.text().len()) {
40 None
41 } else {
42 Some(sub_begin.wrapping_sub(self_begin))
43 }
44 }
45
46 fn is_empty(&'slf self) -> bool {
47 self.text().is_empty()
48 }
49
50 /// Converts a unicode character position to a UTF-8 byte position
51 fn utf8byte(&'slf self, abscursor: usize) -> Result<usize, StamError>;
52
53 /// Converts a UTF-8 byte position into a unicode position
54 fn utf8byte_to_charpos(&'slf self, bytecursor: usize) -> Result<usize, StamError>;
55
56 /// Resolves a begin-aligned cursor to an absolute cursor (i.e. relative to the TextResource).
57 fn absolute_cursor(&'slf self, cursor: usize) -> usize;
58
59 /// Resolves a relative offset (relative to another TextSelection) to an absolute one (in terms of to the underlying TextResource)
60 fn absolute_offset(&'slf self, offset: &Offset) -> Result<Offset, StamError> {
61 Ok(Offset::simple(
62 self.absolute_cursor(self.beginaligned_cursor(&offset.begin)?),
63 self.absolute_cursor(self.beginaligned_cursor(&offset.end)?),
64 ))
65 }
66
67 /// Resolves a cursor to a begin aligned cursor, resolving all relative end-aligned positions
68 fn beginaligned_cursor(&'slf self, cursor: &Cursor) -> Result<usize, StamError> {
69 match *cursor {
70 Cursor::BeginAligned(cursor) => Ok(cursor),
71 Cursor::EndAligned(cursor) => {
72 if cursor.abs() as usize > self.textlen() {
73 Err(StamError::CursorOutOfBounds(
74 Cursor::EndAligned(cursor),
75 "TextResource::beginaligned_cursor(): end aligned cursor ends up before the beginning",
76 ))
77 } else {
78 Ok(self.textlen() - cursor.abs() as usize)
79 }
80 }
81 }
82 }
83}