Skip to main content

scitadel_core/models/
annotation.rs

1//! Annotations (highlights + threaded notes) anchored to paper text.
2//!
3//! Follows the W3C Web Annotation selector pattern: a single annotation
4//! may carry multiple selectors (position, quote + context, sentence id),
5//! and the resolver tries them in order on open. Threading is self-
6//! referential via `parent_id`; replies inherit the root's anchor.
7
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11use super::{AnnotationId, PaperId, QuestionId};
12
13/// Status of the anchor after last resolve attempt.
14#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "lowercase")]
16pub enum AnchorStatus {
17    #[default]
18    /// Character-range match; the quote still lives at the same offset.
19    Ok,
20    /// The exact offsets moved but the quote (or sentence id) still matches.
21    Drifted,
22    /// None of the selectors matched — needs user re-anchoring.
23    Orphan,
24}
25
26impl AnchorStatus {
27    #[must_use]
28    pub fn as_str(self) -> &'static str {
29        match self {
30            Self::Ok => "ok",
31            Self::Drifted => "drifted",
32            Self::Orphan => "orphan",
33        }
34    }
35
36    #[must_use]
37    pub fn parse(s: &str) -> Option<Self> {
38        match s {
39            "ok" => Some(Self::Ok),
40            "drifted" => Some(Self::Drifted),
41            "orphan" => Some(Self::Orphan),
42            _ => None,
43        }
44    }
45}
46
47/// Multi-selector anchor. Any field may be `None`; the resolver falls
48/// through: position → quote + context → sentence id → orphan.
49#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
50pub struct Anchor {
51    /// TextPositionSelector: fast, fragile. `(start, end)` in chars.
52    pub char_range: Option<(usize, usize)>,
53    /// TextQuoteSelector body.
54    pub quote: Option<String>,
55    /// Context before the quote — used for disambiguation.
56    pub prefix: Option<String>,
57    /// Context after the quote.
58    pub suffix: Option<String>,
59    /// SHA1 of the normalized sentence containing the quote.
60    pub sentence_id: Option<String>,
61    /// Which paper-text extraction version this was anchored against.
62    pub source_version: Option<String>,
63    /// Last-known resolution status; updated on open.
64    pub status: AnchorStatus,
65}
66
67impl Anchor {
68    /// Is this an orphan that requires user re-anchoring?
69    #[must_use]
70    pub fn is_orphan(&self) -> bool {
71        matches!(self.status, AnchorStatus::Orphan)
72    }
73}
74
75/// One annotation. May be a root (with an anchor) or a reply (parent_id set,
76/// anchor empty; the root's anchor is the canonical one for rendering).
77#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
78pub struct Annotation {
79    pub id: AnnotationId,
80    /// `None` = root (carries the anchor). `Some` = reply to that ID.
81    pub parent_id: Option<AnnotationId>,
82    pub paper_id: PaperId,
83    pub question_id: Option<QuestionId>,
84    pub anchor: Anchor,
85    pub note: String,
86    pub color: Option<String>,
87    pub tags: Vec<String>,
88    /// Identity string — `$USER` for TUI writes, required for MCP writes.
89    pub author: String,
90    pub created_at: DateTime<Utc>,
91    pub updated_at: DateTime<Utc>,
92    /// Soft-delete tombstone. None = live.
93    pub deleted_at: Option<DateTime<Utc>>,
94}
95
96impl Annotation {
97    /// Build a new root-level annotation with the given anchor.
98    #[must_use]
99    pub fn new_root(paper_id: PaperId, author: String, note: String, anchor: Anchor) -> Self {
100        let now = Utc::now();
101        Self {
102            id: AnnotationId::new(),
103            parent_id: None,
104            paper_id,
105            question_id: None,
106            anchor,
107            note,
108            color: None,
109            tags: Vec::new(),
110            author,
111            created_at: now,
112            updated_at: now,
113            deleted_at: None,
114        }
115    }
116
117    /// Build a new reply whose anchor is empty (inherits from root).
118    #[must_use]
119    pub fn new_reply(parent: &Annotation, author: String, note: String) -> Self {
120        let now = Utc::now();
121        Self {
122            id: AnnotationId::new(),
123            parent_id: Some(parent.id.clone()),
124            paper_id: parent.paper_id.clone(),
125            question_id: parent.question_id.clone(),
126            anchor: Anchor::default(),
127            note,
128            color: None,
129            tags: Vec::new(),
130            author,
131            created_at: now,
132            updated_at: now,
133            deleted_at: None,
134        }
135    }
136
137    /// True if this is a reply to another annotation.
138    #[must_use]
139    pub fn is_reply(&self) -> bool {
140        self.parent_id.is_some()
141    }
142
143    /// True if this annotation has been soft-deleted.
144    #[must_use]
145    pub fn is_deleted(&self) -> bool {
146        self.deleted_at.is_some()
147    }
148}
149
150/// Per-reader read receipt; composite key (annotation_id, reader).
151#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
152pub struct AnnotationRead {
153    pub annotation_id: AnnotationId,
154    pub reader: String,
155    pub seen_at: DateTime<Utc>,
156}
157
158/// Normalize a sentence for sentence-id hashing.
159///
160/// Per ADR-004: NFKC compose (folds ligatures: fi → fi, fl → fl), Unicode
161/// lowercase, then collapse all Unicode whitespace runs to a single
162/// ASCII space and trim. Two sentences that differ only in case,
163/// whitespace, or ligature presentation hash to the same value.
164#[must_use]
165pub fn normalize_sentence(s: &str) -> String {
166    use unicode_normalization::UnicodeNormalization;
167    // NFKC: compatibility decomposition + canonical composition.
168    let composed: String = s.nfkc().collect();
169    let lowered: String = composed.chars().flat_map(char::to_lowercase).collect();
170    let mut out = String::with_capacity(lowered.len());
171    let mut prev_was_space = true; // collapses leading whitespace
172    for ch in lowered.chars() {
173        if ch.is_whitespace() {
174            if !prev_was_space {
175                out.push(' ');
176                prev_was_space = true;
177            }
178        } else {
179            out.push(ch);
180            prev_was_space = false;
181        }
182    }
183    if out.ends_with(' ') {
184        out.pop();
185    }
186    out
187}
188
189/// SHA1 hex of the normalized sentence — stable identifier the
190/// resolver can compare against sentences extracted from current
191/// paper text. See `normalize_sentence` and ADR-004 for the
192/// normalization spec.
193#[must_use]
194pub fn sentence_id(s: &str) -> String {
195    use sha1::{Digest, Sha1};
196    let normalized = normalize_sentence(s);
197    let mut hasher = Sha1::new();
198    hasher.update(normalized.as_bytes());
199    let digest = hasher.finalize();
200    let mut hex = String::with_capacity(40);
201    for byte in digest {
202        use std::fmt::Write as _;
203        let _ = write!(hex, "{byte:02x}");
204    }
205    hex
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn reply_inherits_paper_and_question() {
214        let paper_id: PaperId = "p1".into();
215        let root = Annotation::new_root(
216            paper_id.clone(),
217            "lars".into(),
218            "interesting passage".into(),
219            Anchor {
220                quote: Some("neutron energy".into()),
221                ..Anchor::default()
222            },
223        );
224        let reply = Annotation::new_reply(&root, "claude".into(), "agreed; see 4.2".into());
225        assert_eq!(reply.paper_id, paper_id);
226        assert_eq!(reply.parent_id.as_ref(), Some(&root.id));
227        assert!(reply.anchor.quote.is_none(), "replies inherit anchor");
228    }
229
230    #[test]
231    fn anchor_status_round_trip() {
232        for s in [
233            AnchorStatus::Ok,
234            AnchorStatus::Drifted,
235            AnchorStatus::Orphan,
236        ] {
237            assert_eq!(AnchorStatus::parse(s.as_str()), Some(s));
238        }
239    }
240
241    #[test]
242    fn orphan_flag() {
243        let mut a = Anchor::default();
244        assert!(!a.is_orphan());
245        a.status = AnchorStatus::Orphan;
246        assert!(a.is_orphan());
247    }
248
249    #[test]
250    fn normalize_collapses_whitespace_and_lowercases() {
251        assert_eq!(normalize_sentence("  Hello   WORLD\n"), "hello world");
252    }
253
254    #[test]
255    fn normalize_folds_ligatures_via_nfkc() {
256        // U+FB01 (fi) → "fi" under NFKC, so "ef + fi + cient" → "efficient".
257        assert_eq!(normalize_sentence("ef\u{FB01}cient"), "efficient");
258    }
259
260    #[test]
261    fn sentence_id_is_stable_under_whitespace_and_case() {
262        let a = sentence_id("Hello   World");
263        let b = sentence_id("hello world");
264        let c = sentence_id("HELLO\tWORLD");
265        assert_eq!(a, b);
266        assert_eq!(b, c);
267        // Length of SHA1 hex.
268        assert_eq!(a.len(), 40);
269    }
270
271    #[test]
272    fn sentence_id_changes_when_content_does() {
273        assert_ne!(sentence_id("hello world"), sentence_id("hello mars"));
274    }
275}