Skip to main content

scitadel_db/sqlite/
annotations.rs

1//! SQLite-backed repository for annotations (#49 iter 2, #96 resolver).
2//!
3//! Covers CRUD, threaded reply loading, and the four-step W3C-style
4//! anchor resolver:
5//!
6//! 1. position (`char_range` + bounds-check)
7//! 2. quote with prefix/suffix context disambiguation
8//! 3. fuzzy quote match (Jaro-Winkler over a sliding window)
9//! 4. sentence-id (SHA1 of normalized sentence; see ADR-004)
10//!
11//! Failure of all four selectors yields `AnchorStatus::Orphan`.
12
13use chrono::{DateTime, Utc};
14use rusqlite::{OptionalExtension, params};
15use scitadel_core::models::{Anchor, AnchorStatus, Annotation, AnnotationId, PaperId, QuestionId};
16
17use crate::error::DbError;
18use crate::sqlite::Database;
19
20#[derive(Clone)]
21pub struct SqliteAnnotationRepository {
22    db: Database,
23}
24
25impl SqliteAnnotationRepository {
26    pub fn new(db: Database) -> Self {
27        Self { db }
28    }
29
30    /// Insert a new annotation. Caller is responsible for building the
31    /// `Annotation` (see `Annotation::new_root` / `new_reply`).
32    pub fn create(&self, annotation: &Annotation) -> Result<(), DbError> {
33        let conn = self.db.conn()?;
34        conn.execute(
35            "INSERT INTO annotations
36                (id, parent_id, paper_id, question_id,
37                 char_start, char_end, quote, prefix, suffix,
38                 sentence_id, source_version, anchor_status,
39                 note, color, tags_json, author,
40                 created_at, updated_at, deleted_at)
41             VALUES (?1, ?2, ?3, ?4,
42                     ?5, ?6, ?7, ?8, ?9,
43                     ?10, ?11, ?12,
44                     ?13, ?14, ?15, ?16,
45                     ?17, ?18, ?19)",
46            params![
47                annotation.id.as_str(),
48                annotation.parent_id.as_ref().map(AnnotationId::as_str),
49                annotation.paper_id.as_str(),
50                annotation.question_id.as_ref().map(QuestionId::as_str),
51                annotation.anchor.char_range.map(|(s, _)| s as i64),
52                annotation.anchor.char_range.map(|(_, e)| e as i64),
53                annotation.anchor.quote,
54                annotation.anchor.prefix,
55                annotation.anchor.suffix,
56                annotation.anchor.sentence_id,
57                annotation.anchor.source_version,
58                annotation.anchor.status.as_str(),
59                annotation.note,
60                annotation.color,
61                serde_json::to_string(&annotation.tags).unwrap_or_else(|_| "[]".into()),
62                annotation.author,
63                annotation.created_at.to_rfc3339(),
64                annotation.updated_at.to_rfc3339(),
65                annotation.deleted_at.map(|d| d.to_rfc3339()),
66            ],
67        )?;
68        Ok(())
69    }
70
71    /// Fetch an annotation by ID (live rows only).
72    pub fn get(&self, id: &str) -> Result<Option<Annotation>, DbError> {
73        let conn = self.db.conn()?;
74        let mut stmt =
75            conn.prepare("SELECT * FROM annotations WHERE id = ?1 AND deleted_at IS NULL")?;
76        let out = stmt.query_row(params![id], row_to_annotation).optional()?;
77        Ok(out)
78    }
79
80    /// All live annotations anchored to a paper (roots + replies).
81    pub fn list_by_paper(&self, paper_id: &str) -> Result<Vec<Annotation>, DbError> {
82        let conn = self.db.conn()?;
83        let mut stmt = conn.prepare(
84            "SELECT * FROM annotations
85             WHERE paper_id = ?1 AND deleted_at IS NULL
86             ORDER BY created_at ASC",
87        )?;
88        let rows = stmt.query_map(params![paper_id], row_to_annotation)?;
89        Ok(rows.filter_map(Result::ok).collect())
90    }
91
92    /// All live replies to a specific root annotation, ordered oldest-first.
93    pub fn list_replies(&self, parent_id: &str) -> Result<Vec<Annotation>, DbError> {
94        let conn = self.db.conn()?;
95        let mut stmt = conn.prepare(
96            "SELECT * FROM annotations
97             WHERE parent_id = ?1 AND deleted_at IS NULL
98             ORDER BY created_at ASC",
99        )?;
100        let rows = stmt.query_map(params![parent_id], row_to_annotation)?;
101        Ok(rows.filter_map(Result::ok).collect())
102    }
103
104    /// Update mutable fields (note / color / tags). Anchor is updated
105    /// separately via `update_anchor` since it has its own lifecycle.
106    pub fn update_note(
107        &self,
108        id: &str,
109        note: &str,
110        color: Option<&str>,
111        tags: &[String],
112    ) -> Result<(), DbError> {
113        let conn = self.db.conn()?;
114        conn.execute(
115            "UPDATE annotations
116             SET note = ?1, color = ?2, tags_json = ?3, updated_at = ?4
117             WHERE id = ?5",
118            params![
119                note,
120                color,
121                serde_json::to_string(tags).unwrap_or_else(|_| "[]".into()),
122                Utc::now().to_rfc3339(),
123                id,
124            ],
125        )?;
126        Ok(())
127    }
128
129    /// Persist the resolver's updated anchor state. Called after
130    /// `resolve_anchor` runs on paper-open.
131    pub fn update_anchor(&self, id: &str, anchor: &Anchor) -> Result<(), DbError> {
132        let conn = self.db.conn()?;
133        conn.execute(
134            "UPDATE annotations
135             SET char_start = ?1, char_end = ?2,
136                 anchor_status = ?3, updated_at = ?4
137             WHERE id = ?5",
138            params![
139                anchor.char_range.map(|(s, _)| s as i64),
140                anchor.char_range.map(|(_, e)| e as i64),
141                anchor.status.as_str(),
142                Utc::now().to_rfc3339(),
143                id,
144            ],
145        )?;
146        Ok(())
147    }
148
149    /// Soft-delete — tombstones the row so replies still point at
150    /// something, and `list_*` queries skip it.
151    pub fn soft_delete(&self, id: &str) -> Result<(), DbError> {
152        let conn = self.db.conn()?;
153        conn.execute(
154            "UPDATE annotations SET deleted_at = ?1 WHERE id = ?2",
155            params![Utc::now().to_rfc3339(), id],
156        )?;
157        Ok(())
158    }
159
160    /// Record that `reader` has seen the current state of each annotation.
161    /// Upserts so repeat calls bump `seen_at`.
162    pub fn mark_seen(&self, annotation_ids: &[&str], reader: &str) -> Result<(), DbError> {
163        if annotation_ids.is_empty() {
164            return Ok(());
165        }
166        let mut conn = self.db.conn()?;
167        let tx = conn.transaction()?;
168        let now = Utc::now().to_rfc3339();
169        for id in annotation_ids {
170            tx.execute(
171                "INSERT INTO annotation_reads (annotation_id, reader, seen_at)
172                 VALUES (?1, ?2, ?3)
173                 ON CONFLICT(annotation_id, reader) DO UPDATE SET seen_at = excluded.seen_at",
174                params![id, reader, now],
175            )?;
176        }
177        tx.commit()?;
178        Ok(())
179    }
180
181    /// Mark the thread rooted at `root_id` (root + all live replies) as
182    /// seen by `reader`.
183    pub fn mark_thread_seen(&self, root_id: &str, reader: &str) -> Result<(), DbError> {
184        let replies = self.list_replies(root_id)?;
185        let mut ids: Vec<&str> = replies.iter().map(|a| a.id.as_str()).collect();
186        ids.push(root_id);
187        self.mark_seen(&ids, reader)
188    }
189
190    /// Annotations the `reader` hasn't seen since the last modification.
191    /// Optional `paper_id` scopes the query. Uses a LEFT JOIN so rows
192    /// with no receipt count as unread; rows whose `seen_at` is older
193    /// than `updated_at` also count (the annotation changed since last
194    /// view).
195    pub fn list_unread(
196        &self,
197        reader: &str,
198        paper_id: Option<&str>,
199    ) -> Result<Vec<Annotation>, DbError> {
200        let conn = self.db.conn()?;
201        let (sql, rows) = if let Some(pid) = paper_id {
202            let mut stmt = conn.prepare(
203                "SELECT a.* FROM annotations a
204                 LEFT JOIN annotation_reads r
205                   ON r.annotation_id = a.id AND r.reader = ?1
206                 WHERE a.paper_id = ?2
207                   AND a.deleted_at IS NULL
208                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
209                 ORDER BY a.created_at ASC",
210            )?;
211            let rows = stmt
212                .query_map(params![reader, pid], row_to_annotation)?
213                .filter_map(Result::ok)
214                .collect::<Vec<_>>();
215            ("scoped", rows)
216        } else {
217            let mut stmt = conn.prepare(
218                "SELECT a.* FROM annotations a
219                 LEFT JOIN annotation_reads r
220                   ON r.annotation_id = a.id AND r.reader = ?1
221                 WHERE a.deleted_at IS NULL
222                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
223                 ORDER BY a.created_at ASC",
224            )?;
225            let rows = stmt
226                .query_map(params![reader], row_to_annotation)?
227                .filter_map(Result::ok)
228                .collect::<Vec<_>>();
229            ("all", rows)
230        };
231        let _ = sql; // kept for potential future logging
232        Ok(rows)
233    }
234}
235
236fn row_to_annotation(row: &rusqlite::Row) -> rusqlite::Result<Annotation> {
237    let char_start: Option<i64> = row.get("char_start")?;
238    let char_end: Option<i64> = row.get("char_end")?;
239    let char_range = match (char_start, char_end) {
240        (Some(s), Some(e)) => Some((s as usize, e as usize)),
241        _ => None,
242    };
243    let anchor_status_str: Option<String> = row.get("anchor_status")?;
244    let anchor = Anchor {
245        char_range,
246        quote: row.get("quote")?,
247        prefix: row.get("prefix")?,
248        suffix: row.get("suffix")?,
249        sentence_id: row.get("sentence_id")?,
250        source_version: row.get("source_version")?,
251        status: anchor_status_str
252            .as_deref()
253            .and_then(AnchorStatus::parse)
254            .unwrap_or_default(),
255    };
256
257    let tags_json: String = row.get("tags_json")?;
258    let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
259
260    let parent_id: Option<String> = row.get("parent_id")?;
261    let question_id: Option<String> = row.get("question_id")?;
262    let created_at: String = row.get("created_at")?;
263    let updated_at: String = row.get("updated_at")?;
264    let deleted_at: Option<String> = row.get("deleted_at")?;
265
266    Ok(Annotation {
267        id: AnnotationId::from(row.get::<_, String>("id")?),
268        parent_id: parent_id.map(AnnotationId::from),
269        paper_id: PaperId::from(row.get::<_, String>("paper_id")?),
270        question_id: question_id.map(QuestionId::from),
271        anchor,
272        note: row.get("note")?,
273        color: row.get("color")?,
274        tags,
275        author: row.get("author")?,
276        created_at: parse_dt(&created_at),
277        updated_at: parse_dt(&updated_at),
278        deleted_at: deleted_at.as_deref().map(parse_dt),
279    })
280}
281
282fn parse_dt(s: &str) -> DateTime<Utc> {
283    DateTime::parse_from_rfc3339(s).map_or_else(|_| Utc::now(), |dt| dt.with_timezone(&Utc))
284}
285
286/// Default fuzzy-match threshold (Jaro-Winkler similarity in [0,1]).
287/// Anchors at or above this score are accepted as `Drifted`. See
288/// `resolve_anchor_with_threshold` for tuning.
289pub const FUZZY_THRESHOLD: f64 = 0.9;
290
291/// Resolve an anchor against current paper text, updating `status` and
292/// (if the quote shifted) `char_range` in place. Four-step W3C-style
293/// pipeline (#96):
294///
295/// 1. **Position**: `char_range` still hits the same `quote` → `Ok`.
296///    Bounds-checked; out-of-range offsets fall through, never panic.
297/// 2. **Quote + prefix/suffix context**: every occurrence of `quote`
298///    in `text` is scored by how well its surroundings match the
299///    stored `prefix` / `suffix`; the best-scoring occurrence wins.
300///    With a single occurrence and no context, behaves like a plain
301///    substring search → `Drifted`.
302/// 3. **Fuzzy quote match**: sliding window the size of `quote` over
303///    `text`; Jaro-Winkler ≥ `FUZZY_THRESHOLD` → `Drifted`. Catches
304///    one-word publisher edits that would otherwise orphan.
305/// 4. **Sentence-id**: split text into sentences, hash each via
306///    `sentence_id()`, and re-anchor on a match. Survives quote
307///    rewrites that preserve the surrounding sentence.
308///
309/// Returns `Orphan` only when all four selectors fail.
310pub fn resolve_anchor(anchor: &mut Anchor, text: &str) -> AnchorStatus {
311    resolve_anchor_with_threshold(anchor, text, FUZZY_THRESHOLD)
312}
313
314pub fn resolve_anchor_with_threshold(
315    anchor: &mut Anchor,
316    text: &str,
317    fuzzy_threshold: f64,
318) -> AnchorStatus {
319    // Step 1: position selector — bounds-checked.
320    if let (Some((start, end)), Some(quote)) = (anchor.char_range, anchor.quote.as_ref())
321        && let Some(slice) = char_slice(text, start, end)
322        && &slice == quote
323    {
324        anchor.status = AnchorStatus::Ok;
325        return AnchorStatus::Ok;
326    }
327
328    // Step 2: quote with prefix/suffix disambiguation.
329    if let Some(quote) = anchor.quote.as_ref()
330        && let Some((sc, ec)) = find_with_context(
331            text,
332            quote,
333            anchor.prefix.as_deref(),
334            anchor.suffix.as_deref(),
335        )
336    {
337        anchor.char_range = Some((sc, ec));
338        anchor.status = AnchorStatus::Drifted;
339        return AnchorStatus::Drifted;
340    }
341
342    // Step 3: fuzzy quote match (sliding window).
343    if let Some(quote) = anchor.quote.as_ref()
344        && let Some((sc, ec)) = fuzzy_find(text, quote, fuzzy_threshold)
345    {
346        anchor.char_range = Some((sc, ec));
347        anchor.status = AnchorStatus::Drifted;
348        return AnchorStatus::Drifted;
349    }
350
351    // Step 4: sentence-id fallback.
352    if let Some(sid) = anchor.sentence_id.as_ref()
353        && let Some((sc, ec)) = find_sentence_by_id(text, sid)
354    {
355        anchor.char_range = Some((sc, ec));
356        anchor.status = AnchorStatus::Drifted;
357        return AnchorStatus::Drifted;
358    }
359
360    anchor.status = AnchorStatus::Orphan;
361    AnchorStatus::Orphan
362}
363
364/// Slice `text` by char positions, returning `None` if the requested
365/// range is malformed (start > end) or beyond the text. Avoids the
366/// panic the old resolver hit on out-of-bounds rows (#96 gap 4).
367fn char_slice(text: &str, start: usize, end: usize) -> Option<String> {
368    if end < start {
369        return None;
370    }
371    let want = end - start;
372    let collected: String = text.chars().skip(start).take(want).collect();
373    if collected.chars().count() == want {
374        Some(collected)
375    } else {
376        None
377    }
378}
379
380/// Find every (start_char, end_char) where `quote` occurs in `text`.
381/// Char-position aware — matches step over multibyte boundaries cleanly.
382fn find_all(text: &str, quote: &str) -> Vec<(usize, usize)> {
383    if quote.is_empty() {
384        return Vec::new();
385    }
386    let mut out = Vec::new();
387    let qlen_chars = quote.chars().count();
388    let mut search_byte = 0;
389    while let Some(rel) = text[search_byte..].find(quote) {
390        let abs = search_byte + rel;
391        let start_char = text[..abs].chars().count();
392        out.push((start_char, start_char + qlen_chars));
393        search_byte = abs + quote.len(); // non-overlapping; quote is non-empty
394    }
395    out
396}
397
398/// Pick the occurrence whose surrounding context best matches the
399/// stored `prefix` / `suffix`. With a single hit and no context, it's
400/// a plain substring lookup; with multiple hits, the prefix-suffix
401/// score breaks the tie.
402fn find_with_context(
403    text: &str,
404    quote: &str,
405    prefix: Option<&str>,
406    suffix: Option<&str>,
407) -> Option<(usize, usize)> {
408    let occurrences = find_all(text, quote);
409    if occurrences.is_empty() {
410        return None;
411    }
412    if occurrences.len() == 1 || (prefix.is_none() && suffix.is_none()) {
413        return Some(occurrences[0]);
414    }
415
416    let chars: Vec<char> = text.chars().collect();
417    occurrences
418        .into_iter()
419        .max_by_key(|&(sc, ec)| context_score(&chars, sc, ec, prefix, suffix))
420}
421
422/// Score a candidate's surroundings against the stored prefix/suffix.
423/// Counts characters that match starting from the inside out (the
424/// chars adjacent to the match are most load-bearing).
425fn context_score(
426    chars: &[char],
427    start: usize,
428    end: usize,
429    prefix: Option<&str>,
430    suffix: Option<&str>,
431) -> i64 {
432    let mut score = 0i64;
433    if let Some(p) = prefix {
434        let want: Vec<char> = p.chars().collect();
435        let max = want.len().min(start);
436        for i in 0..max {
437            // chars[start - 1 - i] vs want[want.len() - 1 - i]
438            if chars[start - 1 - i] == want[want.len() - 1 - i] {
439                score += 1;
440            } else {
441                break;
442            }
443        }
444    }
445    if let Some(s) = suffix {
446        let want: Vec<char> = s.chars().collect();
447        let max = want.len().min(chars.len().saturating_sub(end));
448        for i in 0..max {
449            if chars[end + i] == want[i] {
450                score += 1;
451            } else {
452                break;
453            }
454        }
455    }
456    score
457}
458
459/// Sliding-window fuzzy match. Walks character-aligned windows the
460/// size of `quote` and returns the highest-scoring window that meets
461/// `threshold` (Jaro-Winkler in [0,1]).
462fn fuzzy_find(text: &str, quote: &str, threshold: f64) -> Option<(usize, usize)> {
463    if quote.is_empty() {
464        return None;
465    }
466    let chars: Vec<char> = text.chars().collect();
467    let qlen = quote.chars().count();
468    if chars.len() < qlen {
469        return None;
470    }
471
472    let mut best: Option<(usize, f64)> = None;
473    for start in 0..=chars.len() - qlen {
474        let window: String = chars[start..start + qlen].iter().collect();
475        let score = strsim::jaro_winkler(&window, quote);
476        if score >= threshold && best.is_none_or(|(_, b)| score > b) {
477            best = Some((start, score));
478        }
479    }
480    best.map(|(start, _)| (start, start + qlen))
481}
482
483/// Find the sentence in `text` whose `sentence_id` matches `sid`.
484/// Sentence boundaries are simple terminator-based (`. ! ?`) — good
485/// enough for paper bodies and abstracts; ADR-004 calls out that
486/// proper ICU sentence segmentation is a follow-up.
487fn find_sentence_by_id(text: &str, sid: &str) -> Option<(usize, usize)> {
488    let chars: Vec<char> = text.chars().collect();
489    let mut sentence_start_char = 0;
490    let mut i = 0;
491    while i < chars.len() {
492        let ch = chars[i];
493        let is_terminator = matches!(ch, '.' | '!' | '?');
494        let is_end = i + 1 == chars.len();
495        if is_terminator || is_end {
496            let end = if is_end { chars.len() } else { i + 1 };
497            let sentence: String = chars[sentence_start_char..end].iter().collect();
498            let trimmed = sentence.trim();
499            if !trimmed.is_empty() && scitadel_core::models::sentence_id(trimmed) == sid {
500                // Map back to the trimmed sentence's char range inside `text`.
501                let leading_ws = sentence.chars().take_while(|c| c.is_whitespace()).count();
502                let trailing_ws = sentence
503                    .chars()
504                    .rev()
505                    .take_while(|c| c.is_whitespace())
506                    .count();
507                let trimmed_start = sentence_start_char + leading_ws;
508                let trimmed_end = end - trailing_ws;
509                if trimmed_end > trimmed_start {
510                    return Some((trimmed_start, trimmed_end));
511                }
512            }
513            // Advance past the terminator into the next sentence.
514            sentence_start_char = end;
515        }
516        i += 1;
517    }
518    None
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524    use scitadel_core::models::Annotation;
525
526    fn fresh_db_with_paper() -> Database {
527        let db = Database::open_in_memory().unwrap();
528        db.migrate().unwrap();
529        let conn = db.conn().unwrap();
530        conn.execute(
531            "INSERT INTO papers (id, title, created_at, updated_at)
532             VALUES ('p1', 't', datetime('now'), datetime('now'))",
533            [],
534        )
535        .unwrap();
536        db
537    }
538
539    fn sample_root() -> Annotation {
540        Annotation::new_root(
541            PaperId::from("p1"),
542            "lars".into(),
543            "important passage".into(),
544            Anchor {
545                char_range: Some((10, 25)),
546                quote: Some("neutron energy".into()),
547                ..Anchor::default()
548            },
549        )
550    }
551
552    /// Offline-safe invariant (#51). Every annotation write path
553    /// (`create`, replies, `update_note`, `soft_delete`) must be purely
554    /// local — no network, no auth probe, no reqwest. The 2-pane
555    /// workflow makes this trust-critical: a user on a plane still
556    /// captures their reading notes; the TUI's offline badge only
557    /// gates network-requiring operations (search / download), not
558    /// annotations.
559    ///
560    /// This test locks that invariant in: the entire annotation
561    /// lifecycle round-trips through a fresh in-memory SQLite DB with
562    /// no `reqwest::Client`, no environment, no adapters instantiated.
563    /// If a future refactor introduces a network dep on this path,
564    /// the construction of that dep will either force this test to
565    /// change or will be catchable by review.
566    #[test]
567    fn annotation_writes_are_offline_safe() {
568        let db = fresh_db_with_paper();
569        let repo = SqliteAnnotationRepository::new(db);
570
571        // Create root → reply → update root note → soft-delete reply.
572        // If any of these silently required network access, the call
573        // chain wouldn't compile (no reqwest in this crate's deps).
574        let root = sample_root();
575        repo.create(&root).unwrap();
576        let reply = Annotation::new_reply(&root, "claude".into(), "seconded".into());
577        repo.create(&reply).unwrap();
578        repo.update_note(root.id.as_str(), "edited offline", None, &[])
579            .unwrap();
580        repo.soft_delete(reply.id.as_str()).unwrap();
581
582        // Survivors visible on next read.
583        let all = repo.list_by_paper("p1").unwrap();
584        assert_eq!(all.len(), 1, "root survives; reply tombstoned out");
585        assert_eq!(all[0].note, "edited offline");
586    }
587
588    #[test]
589    fn create_and_get_roundtrip() {
590        let db = fresh_db_with_paper();
591        let repo = SqliteAnnotationRepository::new(db);
592        let root = sample_root();
593        repo.create(&root).unwrap();
594
595        let loaded = repo.get(root.id.as_str()).unwrap().expect("present");
596        assert_eq!(loaded.note, "important passage");
597        assert_eq!(loaded.anchor.char_range, Some((10, 25)));
598        assert_eq!(loaded.anchor.quote.as_deref(), Some("neutron energy"));
599    }
600
601    #[test]
602    fn replies_threaded_under_root() {
603        let db = fresh_db_with_paper();
604        let repo = SqliteAnnotationRepository::new(db);
605        let root = sample_root();
606        repo.create(&root).unwrap();
607        let reply = Annotation::new_reply(&root, "claude".into(), "see fig 4".into());
608        repo.create(&reply).unwrap();
609
610        let replies = repo.list_replies(root.id.as_str()).unwrap();
611        assert_eq!(replies.len(), 1);
612        assert_eq!(replies[0].note, "see fig 4");
613    }
614
615    #[test]
616    fn soft_delete_hides_from_listings_but_thread_preserved() {
617        let db = fresh_db_with_paper();
618        let repo = SqliteAnnotationRepository::new(db);
619        let root = sample_root();
620        repo.create(&root).unwrap();
621        let reply = Annotation::new_reply(&root, "claude".into(), "yep".into());
622        repo.create(&reply).unwrap();
623
624        repo.soft_delete(root.id.as_str()).unwrap();
625
626        // Root is hidden from get() and list_by_paper()
627        assert!(repo.get(root.id.as_str()).unwrap().is_none());
628        assert!(
629            repo.list_by_paper("p1")
630                .unwrap()
631                .iter()
632                .all(|a| a.id != root.id)
633        );
634        // Reply still points at the (soft-deleted) root, so the thread is
635        // recoverable if we ever want to undelete.
636        let replies = repo.list_replies(root.id.as_str()).unwrap();
637        assert_eq!(replies.len(), 1);
638    }
639
640    #[test]
641    fn update_note_persists() {
642        let db = fresh_db_with_paper();
643        let repo = SqliteAnnotationRepository::new(db);
644        let root = sample_root();
645        repo.create(&root).unwrap();
646
647        repo.update_note(
648            root.id.as_str(),
649            "new note",
650            Some("blue"),
651            &["tag1".into(), "tag2".into()],
652        )
653        .unwrap();
654
655        let loaded = repo.get(root.id.as_str()).unwrap().unwrap();
656        assert_eq!(loaded.note, "new note");
657        assert_eq!(loaded.color.as_deref(), Some("blue"));
658        assert_eq!(loaded.tags, vec!["tag1".to_string(), "tag2".to_string()]);
659    }
660
661    // ---- Resolver tests ----
662
663    #[test]
664    fn resolver_ok_when_text_unchanged() {
665        // "abcde" at offsets (1,4) is "bcd".
666        let mut a = Anchor {
667            char_range: Some((1, 4)),
668            quote: Some("bcd".into()),
669            ..Anchor::default()
670        };
671        assert_eq!(resolve_anchor(&mut a, "abcde"), AnchorStatus::Ok);
672    }
673
674    #[test]
675    fn resolver_drifted_when_quote_moved() {
676        // Same quote, shifted 2 chars to the right.
677        let mut a = Anchor {
678            char_range: Some((1, 4)),
679            quote: Some("bcd".into()),
680            ..Anchor::default()
681        };
682        assert_eq!(resolve_anchor(&mut a, "xxabcde"), AnchorStatus::Drifted);
683        assert_eq!(a.char_range, Some((3, 6)));
684        assert_eq!(a.status, AnchorStatus::Drifted);
685    }
686
687    #[test]
688    fn resolver_orphan_when_quote_missing() {
689        let mut a = Anchor {
690            char_range: Some((1, 4)),
691            quote: Some("bcd".into()),
692            ..Anchor::default()
693        };
694        assert_eq!(
695            resolve_anchor(&mut a, "nothing to see"),
696            AnchorStatus::Orphan
697        );
698    }
699
700    // ---- Read-receipt tests ----
701
702    #[test]
703    fn unread_includes_rows_never_seen() {
704        let db = fresh_db_with_paper();
705        let repo = SqliteAnnotationRepository::new(db);
706        let a = sample_root();
707        repo.create(&a).unwrap();
708        let unread = repo.list_unread("lars", Some("p1")).unwrap();
709        assert_eq!(unread.len(), 1);
710    }
711
712    #[test]
713    fn unread_excludes_rows_seen_after_update() {
714        let db = fresh_db_with_paper();
715        let repo = SqliteAnnotationRepository::new(db);
716        let a = sample_root();
717        repo.create(&a).unwrap();
718        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
719        let unread = repo.list_unread("lars", Some("p1")).unwrap();
720        assert!(unread.is_empty(), "should be no unread after mark_seen");
721    }
722
723    #[test]
724    fn unread_reappears_after_annotation_is_updated() {
725        let db = fresh_db_with_paper();
726        let repo = SqliteAnnotationRepository::new(db);
727        let a = sample_root();
728        repo.create(&a).unwrap();
729        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
730        // Pause past the 1-second rfc3339 resolution the repo uses.
731        std::thread::sleep(std::time::Duration::from_millis(1100));
732        repo.update_note(a.id.as_str(), "edited note", None, &[])
733            .unwrap();
734        let unread = repo.list_unread("lars", Some("p1")).unwrap();
735        assert_eq!(unread.len(), 1, "edit should resurface the row as unread");
736    }
737
738    #[test]
739    fn mark_thread_seen_covers_root_and_replies() {
740        let db = fresh_db_with_paper();
741        let repo = SqliteAnnotationRepository::new(db);
742        let root = sample_root();
743        repo.create(&root).unwrap();
744        let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
745        repo.create(&reply).unwrap();
746
747        repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
748        let unread = repo.list_unread("lars", Some("p1")).unwrap();
749        assert!(unread.is_empty());
750    }
751
752    #[test]
753    fn independent_readers_track_state_independently() {
754        let db = fresh_db_with_paper();
755        let repo = SqliteAnnotationRepository::new(db);
756        let a = sample_root();
757        repo.create(&a).unwrap();
758        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
759        assert!(repo.list_unread("lars", Some("p1")).unwrap().is_empty());
760        assert_eq!(repo.list_unread("claude", Some("p1")).unwrap().len(), 1);
761    }
762
763    #[test]
764    fn resolver_handles_multibyte_chars() {
765        // U+2019 (curly apostrophe) is 3 bytes / 1 char.
766        let text = "D\u{2019}Ippolito wrote that...";
767        let quote = "D\u{2019}Ippolito";
768        let mut a = Anchor {
769            char_range: Some((0, quote.chars().count())),
770            quote: Some(quote.into()),
771            ..Anchor::default()
772        };
773        assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Ok);
774    }
775
776    // ---- #96 multi-selector resolver tests ----
777
778    #[test]
779    fn resolver_uses_prefix_to_disambiguate_collision() {
780        // "the model" appears twice; suffix " was trained" picks the second.
781        let text = "Initially the model failed. Then the model was trained on more data.";
782        let mut a = Anchor {
783            char_range: None,
784            quote: Some("the model".into()),
785            prefix: None,
786            suffix: Some(" was trained".into()),
787            ..Anchor::default()
788        };
789        assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Drifted);
790        let (s, e) = a.char_range.unwrap();
791        assert_eq!(&text[s..e], "the model");
792        // Specifically the *second* occurrence.
793        assert!(s > 20, "expected the second occurrence at s>20, got s={s}");
794    }
795
796    #[test]
797    fn resolver_falls_back_to_fuzzy_on_minor_edit() {
798        // Quote was "the network was deep"; publisher edited to "the
799        // network was very deep" — substring fails, fuzzy still hits.
800        let text = "We argued the network was very deep enough to overfit.";
801        let mut a = Anchor {
802            char_range: None,
803            quote: Some("the network was deep".into()),
804            ..Anchor::default()
805        };
806        // Use a permissive threshold so the test isn't sensitive to
807        // strsim version drift.
808        let s = resolve_anchor_with_threshold(&mut a, text, 0.85);
809        assert_eq!(
810            s,
811            AnchorStatus::Drifted,
812            "fuzzy match should drift, got {s:?}"
813        );
814    }
815
816    #[test]
817    fn resolver_returns_orphan_when_offsets_oob_and_quote_absent() {
818        // char_range out of bounds, quote not present in text — must
819        // return Orphan instead of panicking. (#96 gap 4)
820        let mut a = Anchor {
821            char_range: Some((9000, 9100)),
822            quote: Some("vanished".into()),
823            ..Anchor::default()
824        };
825        assert_eq!(
826            resolve_anchor(&mut a, "the small text"),
827            AnchorStatus::Orphan
828        );
829    }
830
831    #[test]
832    fn resolver_uses_sentence_id_when_quote_unfindable() {
833        use scitadel_core::models::sentence_id;
834        // Sentence content preserved (same words, different
835        // case/whitespace). Quote string is wholly absent from the
836        // new text so substring + fuzzy fail; sentence-id rescues.
837        let original_sentence = "The Transformer Architecture relies on self-attention.";
838        let new_text = "Intro. the   transformer architecture relies on self-attention. Outro.";
839        let mut a = Anchor {
840            char_range: None,
841            // Bypasses substring + fuzzy.
842            quote: Some("ZZZ-not-in-new-text-ZZZ".into()),
843            sentence_id: Some(sentence_id(original_sentence)),
844            ..Anchor::default()
845        };
846        let s = resolve_anchor(&mut a, new_text);
847        assert_eq!(
848            s,
849            AnchorStatus::Drifted,
850            "sentence-id rescue should mark Drifted, got {s:?}"
851        );
852        let (start, end) = a.char_range.unwrap();
853        let resolved: String = new_text.chars().skip(start).take(end - start).collect();
854        assert!(
855            resolved.contains("transformer architecture"),
856            "expected re-anchor to the matching sentence; got {resolved:?}"
857        );
858    }
859}