Skip to main content

scitadel_db/sqlite/
annotations.rs

1//! SQLite-backed repository for annotations (#49 iter 2, #96 resolver).
2//!
3//! Covers CRUD, threaded reply loading, and the four-step W3C-style
4//! anchor resolver:
5//!
6//! 1. position (`char_range` + bounds-check)
7//! 2. quote with prefix/suffix context disambiguation
8//! 3. fuzzy quote match (Jaro-Winkler over a sliding window)
9//! 4. sentence-id (SHA1 of normalized sentence; see ADR-004)
10//!
11//! Failure of all four selectors yields `AnchorStatus::Orphan`.
12
13use chrono::{DateTime, Utc};
14use rusqlite::{OptionalExtension, params};
15use scitadel_core::models::{Anchor, AnchorStatus, Annotation, AnnotationId, PaperId, QuestionId};
16
17use crate::error::DbError;
18use crate::sqlite::Database;
19
20#[derive(Clone)]
21pub struct SqliteAnnotationRepository {
22    db: Database,
23}
24
25impl SqliteAnnotationRepository {
26    pub fn new(db: Database) -> Self {
27        Self { db }
28    }
29
30    /// Insert a new annotation. Caller is responsible for building the
31    /// `Annotation` (see `Annotation::new_root` / `new_reply`).
32    pub fn create(&self, annotation: &Annotation) -> Result<(), DbError> {
33        let conn = self.db.conn()?;
34        Self::insert_via(&conn, annotation)
35    }
36
37    /// Transactional sibling of [`Self::create`] (#157). Used by the
38    /// bib-import orchestrator so paper-save + alias-record + annotation-
39    /// create commit (or roll back) as a single unit per row.
40    pub fn create_in_tx(
41        tx: &rusqlite::Transaction<'_>,
42        annotation: &Annotation,
43    ) -> Result<(), DbError> {
44        Self::insert_via(tx, annotation)
45    }
46
47    fn insert_via(conn: &rusqlite::Connection, annotation: &Annotation) -> Result<(), DbError> {
48        conn.execute(
49            "INSERT INTO annotations
50                (id, parent_id, paper_id, question_id,
51                 char_start, char_end, quote, prefix, suffix,
52                 sentence_id, source_version, anchor_status,
53                 note, color, tags_json, author,
54                 created_at, updated_at, deleted_at)
55             VALUES (?1, ?2, ?3, ?4,
56                     ?5, ?6, ?7, ?8, ?9,
57                     ?10, ?11, ?12,
58                     ?13, ?14, ?15, ?16,
59                     ?17, ?18, ?19)",
60            params![
61                annotation.id.as_str(),
62                annotation.parent_id.as_ref().map(AnnotationId::as_str),
63                annotation.paper_id.as_str(),
64                annotation.question_id.as_ref().map(QuestionId::as_str),
65                annotation.anchor.char_range.map(|(s, _)| s as i64),
66                annotation.anchor.char_range.map(|(_, e)| e as i64),
67                annotation.anchor.quote,
68                annotation.anchor.prefix,
69                annotation.anchor.suffix,
70                annotation.anchor.sentence_id,
71                annotation.anchor.source_version,
72                annotation.anchor.status.as_str(),
73                annotation.note,
74                annotation.color,
75                serde_json::to_string(&annotation.tags).unwrap_or_else(|_| "[]".into()),
76                annotation.author,
77                annotation.created_at.to_rfc3339(),
78                annotation.updated_at.to_rfc3339(),
79                annotation.deleted_at.map(|d| d.to_rfc3339()),
80            ],
81        )?;
82        Ok(())
83    }
84
85    /// Fetch an annotation by ID (live rows only).
86    pub fn get(&self, id: &str) -> Result<Option<Annotation>, DbError> {
87        let conn = self.db.conn()?;
88        let mut stmt =
89            conn.prepare("SELECT * FROM annotations WHERE id = ?1 AND deleted_at IS NULL")?;
90        let out = stmt.query_row(params![id], row_to_annotation).optional()?;
91        Ok(out)
92    }
93
94    /// All live annotations anchored to a paper (roots + replies).
95    pub fn list_by_paper(&self, paper_id: &str) -> Result<Vec<Annotation>, DbError> {
96        let conn = self.db.conn()?;
97        let mut stmt = conn.prepare(
98            "SELECT * FROM annotations
99             WHERE paper_id = ?1 AND deleted_at IS NULL
100             ORDER BY created_at ASC",
101        )?;
102        let rows = stmt.query_map(params![paper_id], row_to_annotation)?;
103        Ok(rows.filter_map(Result::ok).collect())
104    }
105
106    /// All live replies to a specific root annotation, ordered oldest-first.
107    pub fn list_replies(&self, parent_id: &str) -> Result<Vec<Annotation>, DbError> {
108        let conn = self.db.conn()?;
109        let mut stmt = conn.prepare(
110            "SELECT * FROM annotations
111             WHERE parent_id = ?1 AND deleted_at IS NULL
112             ORDER BY created_at ASC",
113        )?;
114        let rows = stmt.query_map(params![parent_id], row_to_annotation)?;
115        Ok(rows.filter_map(Result::ok).collect())
116    }
117
118    /// Update mutable fields (note / color / tags). Anchor is updated
119    /// separately via `update_anchor` since it has its own lifecycle.
120    pub fn update_note(
121        &self,
122        id: &str,
123        note: &str,
124        color: Option<&str>,
125        tags: &[String],
126    ) -> Result<(), DbError> {
127        let conn = self.db.conn()?;
128        conn.execute(
129            "UPDATE annotations
130             SET note = ?1, color = ?2, tags_json = ?3, updated_at = ?4
131             WHERE id = ?5",
132            params![
133                note,
134                color,
135                serde_json::to_string(tags).unwrap_or_else(|_| "[]".into()),
136                Utc::now().to_rfc3339(),
137                id,
138            ],
139        )?;
140        Ok(())
141    }
142
143    /// Persist the resolver's updated anchor state. Called after
144    /// `resolve_anchor` runs on paper-open.
145    pub fn update_anchor(&self, id: &str, anchor: &Anchor) -> Result<(), DbError> {
146        let conn = self.db.conn()?;
147        conn.execute(
148            "UPDATE annotations
149             SET char_start = ?1, char_end = ?2,
150                 anchor_status = ?3, updated_at = ?4
151             WHERE id = ?5",
152            params![
153                anchor.char_range.map(|(s, _)| s as i64),
154                anchor.char_range.map(|(_, e)| e as i64),
155                anchor.status.as_str(),
156                Utc::now().to_rfc3339(),
157                id,
158            ],
159        )?;
160        Ok(())
161    }
162
163    /// Soft-delete — tombstones the row so replies still point at
164    /// something, and `list_*` queries skip it.
165    pub fn soft_delete(&self, id: &str) -> Result<(), DbError> {
166        let conn = self.db.conn()?;
167        conn.execute(
168            "UPDATE annotations SET deleted_at = ?1 WHERE id = ?2",
169            params![Utc::now().to_rfc3339(), id],
170        )?;
171        Ok(())
172    }
173
174    /// Record that `reader` has seen the current state of each annotation.
175    /// Upserts so repeat calls bump `seen_at`.
176    pub fn mark_seen(&self, annotation_ids: &[&str], reader: &str) -> Result<(), DbError> {
177        if annotation_ids.is_empty() {
178            return Ok(());
179        }
180        let mut conn = self.db.conn()?;
181        let tx = conn.transaction()?;
182        let now = Utc::now().to_rfc3339();
183        for id in annotation_ids {
184            tx.execute(
185                "INSERT INTO annotation_reads (annotation_id, reader, seen_at)
186                 VALUES (?1, ?2, ?3)
187                 ON CONFLICT(annotation_id, reader) DO UPDATE SET seen_at = excluded.seen_at",
188                params![id, reader, now],
189            )?;
190        }
191        tx.commit()?;
192        Ok(())
193    }
194
195    /// Mark the thread rooted at `root_id` (root + all live replies) as
196    /// seen by `reader`.
197    pub fn mark_thread_seen(&self, root_id: &str, reader: &str) -> Result<(), DbError> {
198        let replies = self.list_replies(root_id)?;
199        let mut ids: Vec<&str> = replies.iter().map(|a| a.id.as_str()).collect();
200        ids.push(root_id);
201        self.mark_seen(&ids, reader)
202    }
203
204    /// Annotations the `reader` hasn't seen since the last modification.
205    /// Optional `paper_id` scopes the query. Uses a LEFT JOIN so rows
206    /// with no receipt count as unread; rows whose `seen_at` is older
207    /// than `updated_at` also count (the annotation changed since last
208    /// view).
209    pub fn list_unread(
210        &self,
211        reader: &str,
212        paper_id: Option<&str>,
213    ) -> Result<Vec<Annotation>, DbError> {
214        let conn = self.db.conn()?;
215        let (sql, rows) = if let Some(pid) = paper_id {
216            let mut stmt = conn.prepare(
217                "SELECT a.* FROM annotations a
218                 LEFT JOIN annotation_reads r
219                   ON r.annotation_id = a.id AND r.reader = ?1
220                 WHERE a.paper_id = ?2
221                   AND a.deleted_at IS NULL
222                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
223                 ORDER BY a.created_at ASC",
224            )?;
225            let rows = stmt
226                .query_map(params![reader, pid], row_to_annotation)?
227                .filter_map(Result::ok)
228                .collect::<Vec<_>>();
229            ("scoped", rows)
230        } else {
231            let mut stmt = conn.prepare(
232                "SELECT a.* FROM annotations a
233                 LEFT JOIN annotation_reads r
234                   ON r.annotation_id = a.id AND r.reader = ?1
235                 WHERE a.deleted_at IS NULL
236                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
237                 ORDER BY a.created_at ASC",
238            )?;
239            let rows = stmt
240                .query_map(params![reader], row_to_annotation)?
241                .filter_map(Result::ok)
242                .collect::<Vec<_>>();
243            ("all", rows)
244        };
245        let _ = sql; // kept for potential future logging
246        Ok(rows)
247    }
248
249    /// Set of paper IDs that have at least one annotation `reader`
250    /// hasn't acknowledged. Drives the per-row `●` glyph on the
251    /// Papers list — one indexed query per draw tick. (#185)
252    pub fn papers_with_unread(
253        &self,
254        reader: &str,
255    ) -> Result<std::collections::HashSet<String>, DbError> {
256        let conn = self.db.conn()?;
257        let mut stmt = conn.prepare(
258            "SELECT DISTINCT a.paper_id FROM annotations a
259             LEFT JOIN annotation_reads r
260               ON r.annotation_id = a.id AND r.reader = ?1
261             WHERE a.deleted_at IS NULL
262               AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
263        )?;
264        let rows = stmt
265            .query_map(params![reader], |row| row.get::<_, String>(0))?
266            .filter_map(Result::ok);
267        Ok(rows.collect())
268    }
269
270    /// Same predicate as `list_unread` but returns just the count.
271    /// Called on every TUI draw tick (~10 Hz) to drive the status-bar
272    /// `[N new]` badge — `COUNT(*)` keeps the per-tick cost in
273    /// microseconds even when there are many annotations. (#185)
274    pub fn count_unread(&self, reader: &str, paper_id: Option<&str>) -> Result<i64, DbError> {
275        let conn = self.db.conn()?;
276        let n: i64 = if let Some(pid) = paper_id {
277            conn.query_row(
278                "SELECT COUNT(*) FROM annotations a
279                 LEFT JOIN annotation_reads r
280                   ON r.annotation_id = a.id AND r.reader = ?1
281                 WHERE a.paper_id = ?2
282                   AND a.deleted_at IS NULL
283                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
284                params![reader, pid],
285                |row| row.get(0),
286            )?
287        } else {
288            conn.query_row(
289                "SELECT COUNT(*) FROM annotations a
290                 LEFT JOIN annotation_reads r
291                   ON r.annotation_id = a.id AND r.reader = ?1
292                 WHERE a.deleted_at IS NULL
293                   AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
294                params![reader],
295                |row| row.get(0),
296            )?
297        };
298        Ok(n)
299    }
300}
301
302fn row_to_annotation(row: &rusqlite::Row) -> rusqlite::Result<Annotation> {
303    let char_start: Option<i64> = row.get("char_start")?;
304    let char_end: Option<i64> = row.get("char_end")?;
305    let char_range = match (char_start, char_end) {
306        (Some(s), Some(e)) => Some((s as usize, e as usize)),
307        _ => None,
308    };
309    let anchor_status_str: Option<String> = row.get("anchor_status")?;
310    let anchor = Anchor {
311        char_range,
312        quote: row.get("quote")?,
313        prefix: row.get("prefix")?,
314        suffix: row.get("suffix")?,
315        sentence_id: row.get("sentence_id")?,
316        source_version: row.get("source_version")?,
317        status: anchor_status_str
318            .as_deref()
319            .and_then(AnchorStatus::parse)
320            .unwrap_or_default(),
321    };
322
323    let tags_json: String = row.get("tags_json")?;
324    let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
325
326    let parent_id: Option<String> = row.get("parent_id")?;
327    let question_id: Option<String> = row.get("question_id")?;
328    let created_at: String = row.get("created_at")?;
329    let updated_at: String = row.get("updated_at")?;
330    let deleted_at: Option<String> = row.get("deleted_at")?;
331
332    Ok(Annotation {
333        id: AnnotationId::from(row.get::<_, String>("id")?),
334        parent_id: parent_id.map(AnnotationId::from),
335        paper_id: PaperId::from(row.get::<_, String>("paper_id")?),
336        question_id: question_id.map(QuestionId::from),
337        anchor,
338        note: row.get("note")?,
339        color: row.get("color")?,
340        tags,
341        author: row.get("author")?,
342        created_at: parse_dt(&created_at),
343        updated_at: parse_dt(&updated_at),
344        deleted_at: deleted_at.as_deref().map(parse_dt),
345    })
346}
347
348fn parse_dt(s: &str) -> DateTime<Utc> {
349    DateTime::parse_from_rfc3339(s).map_or_else(|_| Utc::now(), |dt| dt.with_timezone(&Utc))
350}
351
352/// Default fuzzy-match threshold (Jaro-Winkler similarity in [0,1]).
353/// Anchors at or above this score are accepted as `Drifted`. See
354/// `resolve_anchor_with_threshold` for tuning.
355pub const FUZZY_THRESHOLD: f64 = 0.9;
356
357/// Resolve an anchor against current paper text, updating `status` and
358/// (if the quote shifted) `char_range` in place. Four-step W3C-style
359/// pipeline (#96):
360///
361/// 1. **Position**: `char_range` still hits the same `quote` → `Ok`.
362///    Bounds-checked; out-of-range offsets fall through, never panic.
363/// 2. **Quote + prefix/suffix context**: every occurrence of `quote`
364///    in `text` is scored by how well its surroundings match the
365///    stored `prefix` / `suffix`; the best-scoring occurrence wins.
366///    With a single occurrence and no context, behaves like a plain
367///    substring search → `Drifted`.
368/// 3. **Fuzzy quote match**: sliding window the size of `quote` over
369///    `text`; Jaro-Winkler ≥ `FUZZY_THRESHOLD` → `Drifted`. Catches
370///    one-word publisher edits that would otherwise orphan.
371/// 4. **Sentence-id**: split text into sentences, hash each via
372///    `sentence_id()`, and re-anchor on a match. Survives quote
373///    rewrites that preserve the surrounding sentence.
374///
375/// Returns `Orphan` only when all four selectors fail.
376pub fn resolve_anchor(anchor: &mut Anchor, text: &str) -> AnchorStatus {
377    resolve_anchor_with_threshold(anchor, text, FUZZY_THRESHOLD)
378}
379
380pub fn resolve_anchor_with_threshold(
381    anchor: &mut Anchor,
382    text: &str,
383    fuzzy_threshold: f64,
384) -> AnchorStatus {
385    // Short-circuit: `.bib`-imported `note=` annotations carry only
386    // a synthetic marker `sentence_id` (no quote / range), since they
387    // have nothing to anchor against in the paper text yet. Treat them
388    // as `Ok` instead of falling through to `Orphan`, so they don't
389    // trip the orphan-warning UI flow (#158). Real Orphan semantics
390    // still apply to anchors whose selectors *should* match paper text
391    // but failed to.
392    if anchor.is_imported_synthetic() {
393        anchor.status = AnchorStatus::Ok;
394        return AnchorStatus::Ok;
395    }
396
397    // Short-circuit: paper-level notes — commentary on the
398    // publication as a whole — carry a synthetic `paper-note:<id>`
399    // sentence_id with no quote / range. They have nothing to
400    // re-anchor against in the body text by design, so the only
401    // sensible status is Ok. The TUI renders them in a separate
402    // section above the thread list. (#185)
403    if anchor.is_paper_note() {
404        anchor.status = AnchorStatus::Ok;
405        return AnchorStatus::Ok;
406    }
407
408    // Step 1: position selector — bounds-checked.
409    if let (Some((start, end)), Some(quote)) = (anchor.char_range, anchor.quote.as_ref())
410        && let Some(slice) = char_slice(text, start, end)
411        && &slice == quote
412    {
413        anchor.status = AnchorStatus::Ok;
414        return AnchorStatus::Ok;
415    }
416
417    // Step 2: quote with prefix/suffix disambiguation.
418    if let Some(quote) = anchor.quote.as_ref()
419        && let Some((sc, ec)) = find_with_context(
420            text,
421            quote,
422            anchor.prefix.as_deref(),
423            anchor.suffix.as_deref(),
424        )
425    {
426        anchor.char_range = Some((sc, ec));
427        anchor.status = AnchorStatus::Drifted;
428        return AnchorStatus::Drifted;
429    }
430
431    // Step 3: fuzzy quote match (sliding window).
432    if let Some(quote) = anchor.quote.as_ref()
433        && let Some((sc, ec)) = fuzzy_find(text, quote, fuzzy_threshold)
434    {
435        anchor.char_range = Some((sc, ec));
436        anchor.status = AnchorStatus::Drifted;
437        return AnchorStatus::Drifted;
438    }
439
440    // Step 4: sentence-id fallback.
441    if let Some(sid) = anchor.sentence_id.as_ref()
442        && let Some((sc, ec)) = find_sentence_by_id(text, sid)
443    {
444        anchor.char_range = Some((sc, ec));
445        anchor.status = AnchorStatus::Drifted;
446        return AnchorStatus::Drifted;
447    }
448
449    anchor.status = AnchorStatus::Orphan;
450    AnchorStatus::Orphan
451}
452
453/// Slice `text` by char positions, returning `None` if the requested
454/// range is malformed (start > end) or beyond the text. Avoids the
455/// panic the old resolver hit on out-of-bounds rows (#96 gap 4).
456fn char_slice(text: &str, start: usize, end: usize) -> Option<String> {
457    if end < start {
458        return None;
459    }
460    let want = end - start;
461    let collected: String = text.chars().skip(start).take(want).collect();
462    if collected.chars().count() == want {
463        Some(collected)
464    } else {
465        None
466    }
467}
468
469/// Find every (start_char, end_char) where `quote` occurs in `text`.
470/// Char-position aware — matches step over multibyte boundaries cleanly.
471fn find_all(text: &str, quote: &str) -> Vec<(usize, usize)> {
472    if quote.is_empty() {
473        return Vec::new();
474    }
475    let mut out = Vec::new();
476    let qlen_chars = quote.chars().count();
477    let mut search_byte = 0;
478    while let Some(rel) = text[search_byte..].find(quote) {
479        let abs = search_byte + rel;
480        let start_char = text[..abs].chars().count();
481        out.push((start_char, start_char + qlen_chars));
482        search_byte = abs + quote.len(); // non-overlapping; quote is non-empty
483    }
484    out
485}
486
487/// Pick the occurrence whose surrounding context best matches the
488/// stored `prefix` / `suffix`. With a single hit and no context, it's
489/// a plain substring lookup; with multiple hits, the prefix-suffix
490/// score breaks the tie.
491fn find_with_context(
492    text: &str,
493    quote: &str,
494    prefix: Option<&str>,
495    suffix: Option<&str>,
496) -> Option<(usize, usize)> {
497    let occurrences = find_all(text, quote);
498    if occurrences.is_empty() {
499        return None;
500    }
501    if occurrences.len() == 1 || (prefix.is_none() && suffix.is_none()) {
502        return Some(occurrences[0]);
503    }
504
505    let chars: Vec<char> = text.chars().collect();
506    occurrences
507        .into_iter()
508        .max_by_key(|&(sc, ec)| context_score(&chars, sc, ec, prefix, suffix))
509}
510
511/// Score a candidate's surroundings against the stored prefix/suffix.
512/// Counts characters that match starting from the inside out (the
513/// chars adjacent to the match are most load-bearing).
514fn context_score(
515    chars: &[char],
516    start: usize,
517    end: usize,
518    prefix: Option<&str>,
519    suffix: Option<&str>,
520) -> i64 {
521    let mut score = 0i64;
522    if let Some(p) = prefix {
523        let want: Vec<char> = p.chars().collect();
524        let max = want.len().min(start);
525        for i in 0..max {
526            // chars[start - 1 - i] vs want[want.len() - 1 - i]
527            if chars[start - 1 - i] == want[want.len() - 1 - i] {
528                score += 1;
529            } else {
530                break;
531            }
532        }
533    }
534    if let Some(s) = suffix {
535        let want: Vec<char> = s.chars().collect();
536        let max = want.len().min(chars.len().saturating_sub(end));
537        for i in 0..max {
538            if chars[end + i] == want[i] {
539                score += 1;
540            } else {
541                break;
542            }
543        }
544    }
545    score
546}
547
548/// Sliding-window fuzzy match. Walks character-aligned windows the
549/// size of `quote` and returns the highest-scoring window that meets
550/// `threshold` (Jaro-Winkler in [0,1]).
551fn fuzzy_find(text: &str, quote: &str, threshold: f64) -> Option<(usize, usize)> {
552    if quote.is_empty() {
553        return None;
554    }
555    let chars: Vec<char> = text.chars().collect();
556    let qlen = quote.chars().count();
557    if chars.len() < qlen {
558        return None;
559    }
560
561    let mut best: Option<(usize, f64)> = None;
562    for start in 0..=chars.len() - qlen {
563        let window: String = chars[start..start + qlen].iter().collect();
564        let score = strsim::jaro_winkler(&window, quote);
565        if score >= threshold && best.is_none_or(|(_, b)| score > b) {
566            best = Some((start, score));
567        }
568    }
569    best.map(|(start, _)| (start, start + qlen))
570}
571
572/// Find the sentence in `text` whose `sentence_id` matches `sid`.
573/// Sentence boundaries are simple terminator-based (`. ! ?`) — good
574/// enough for paper bodies and abstracts; ADR-004 calls out that
575/// proper ICU sentence segmentation is a follow-up.
576fn find_sentence_by_id(text: &str, sid: &str) -> Option<(usize, usize)> {
577    let chars: Vec<char> = text.chars().collect();
578    let mut sentence_start_char = 0;
579    let mut i = 0;
580    while i < chars.len() {
581        let ch = chars[i];
582        let is_terminator = matches!(ch, '.' | '!' | '?');
583        let is_end = i + 1 == chars.len();
584        if is_terminator || is_end {
585            let end = if is_end { chars.len() } else { i + 1 };
586            let sentence: String = chars[sentence_start_char..end].iter().collect();
587            let trimmed = sentence.trim();
588            if !trimmed.is_empty() && scitadel_core::models::sentence_id(trimmed) == sid {
589                // Map back to the trimmed sentence's char range inside `text`.
590                let leading_ws = sentence.chars().take_while(|c| c.is_whitespace()).count();
591                let trailing_ws = sentence
592                    .chars()
593                    .rev()
594                    .take_while(|c| c.is_whitespace())
595                    .count();
596                let trimmed_start = sentence_start_char + leading_ws;
597                let trimmed_end = end - trailing_ws;
598                if trimmed_end > trimmed_start {
599                    return Some((trimmed_start, trimmed_end));
600                }
601            }
602            // Advance past the terminator into the next sentence.
603            sentence_start_char = end;
604        }
605        i += 1;
606    }
607    None
608}
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613    use scitadel_core::models::Annotation;
614
615    fn fresh_db_with_paper() -> Database {
616        let db = Database::open_in_memory().unwrap();
617        db.migrate().unwrap();
618        let conn = db.conn().unwrap();
619        conn.execute(
620            "INSERT INTO papers (id, title, created_at, updated_at)
621             VALUES ('p1', 't', datetime('now'), datetime('now'))",
622            [],
623        )
624        .unwrap();
625        db
626    }
627
628    fn sample_root() -> Annotation {
629        Annotation::new_root(
630            PaperId::from("p1"),
631            "lars".into(),
632            "important passage".into(),
633            Anchor {
634                char_range: Some((10, 25)),
635                quote: Some("neutron energy".into()),
636                ..Anchor::default()
637            },
638        )
639    }
640
641    /// Offline-safe invariant (#51). Every annotation write path
642    /// (`create`, replies, `update_note`, `soft_delete`) must be purely
643    /// local — no network, no auth probe, no reqwest. The 2-pane
644    /// workflow makes this trust-critical: a user on a plane still
645    /// captures their reading notes; the TUI's offline badge only
646    /// gates network-requiring operations (search / download), not
647    /// annotations.
648    ///
649    /// This test locks that invariant in: the entire annotation
650    /// lifecycle round-trips through a fresh in-memory SQLite DB with
651    /// no `reqwest::Client`, no environment, no adapters instantiated.
652    /// If a future refactor introduces a network dep on this path,
653    /// the construction of that dep will either force this test to
654    /// change or will be catchable by review.
655    #[test]
656    fn annotation_writes_are_offline_safe() {
657        let db = fresh_db_with_paper();
658        let repo = SqliteAnnotationRepository::new(db);
659
660        // Create root → reply → update root note → soft-delete reply.
661        // If any of these silently required network access, the call
662        // chain wouldn't compile (no reqwest in this crate's deps).
663        let root = sample_root();
664        repo.create(&root).unwrap();
665        let reply = Annotation::new_reply(&root, "claude".into(), "seconded".into());
666        repo.create(&reply).unwrap();
667        repo.update_note(root.id.as_str(), "edited offline", None, &[])
668            .unwrap();
669        repo.soft_delete(reply.id.as_str()).unwrap();
670
671        // Survivors visible on next read.
672        let all = repo.list_by_paper("p1").unwrap();
673        assert_eq!(all.len(), 1, "root survives; reply tombstoned out");
674        assert_eq!(all[0].note, "edited offline");
675    }
676
677    #[test]
678    fn create_and_get_roundtrip() {
679        let db = fresh_db_with_paper();
680        let repo = SqliteAnnotationRepository::new(db);
681        let root = sample_root();
682        repo.create(&root).unwrap();
683
684        let loaded = repo.get(root.id.as_str()).unwrap().expect("present");
685        assert_eq!(loaded.note, "important passage");
686        assert_eq!(loaded.anchor.char_range, Some((10, 25)));
687        assert_eq!(loaded.anchor.quote.as_deref(), Some("neutron energy"));
688    }
689
690    #[test]
691    fn replies_threaded_under_root() {
692        let db = fresh_db_with_paper();
693        let repo = SqliteAnnotationRepository::new(db);
694        let root = sample_root();
695        repo.create(&root).unwrap();
696        let reply = Annotation::new_reply(&root, "claude".into(), "see fig 4".into());
697        repo.create(&reply).unwrap();
698
699        let replies = repo.list_replies(root.id.as_str()).unwrap();
700        assert_eq!(replies.len(), 1);
701        assert_eq!(replies[0].note, "see fig 4");
702    }
703
704    #[test]
705    fn soft_delete_hides_from_listings_but_thread_preserved() {
706        let db = fresh_db_with_paper();
707        let repo = SqliteAnnotationRepository::new(db);
708        let root = sample_root();
709        repo.create(&root).unwrap();
710        let reply = Annotation::new_reply(&root, "claude".into(), "yep".into());
711        repo.create(&reply).unwrap();
712
713        repo.soft_delete(root.id.as_str()).unwrap();
714
715        // Root is hidden from get() and list_by_paper()
716        assert!(repo.get(root.id.as_str()).unwrap().is_none());
717        assert!(
718            repo.list_by_paper("p1")
719                .unwrap()
720                .iter()
721                .all(|a| a.id != root.id)
722        );
723        // Reply still points at the (soft-deleted) root, so the thread is
724        // recoverable if we ever want to undelete.
725        let replies = repo.list_replies(root.id.as_str()).unwrap();
726        assert_eq!(replies.len(), 1);
727    }
728
729    #[test]
730    fn update_note_persists() {
731        let db = fresh_db_with_paper();
732        let repo = SqliteAnnotationRepository::new(db);
733        let root = sample_root();
734        repo.create(&root).unwrap();
735
736        repo.update_note(
737            root.id.as_str(),
738            "new note",
739            Some("blue"),
740            &["tag1".into(), "tag2".into()],
741        )
742        .unwrap();
743
744        let loaded = repo.get(root.id.as_str()).unwrap().unwrap();
745        assert_eq!(loaded.note, "new note");
746        assert_eq!(loaded.color.as_deref(), Some("blue"));
747        assert_eq!(loaded.tags, vec!["tag1".to_string(), "tag2".to_string()]);
748    }
749
750    // ---- Resolver tests ----
751
752    #[test]
753    fn resolver_ok_when_text_unchanged() {
754        // "abcde" at offsets (1,4) is "bcd".
755        let mut a = Anchor {
756            char_range: Some((1, 4)),
757            quote: Some("bcd".into()),
758            ..Anchor::default()
759        };
760        assert_eq!(resolve_anchor(&mut a, "abcde"), AnchorStatus::Ok);
761    }
762
763    #[test]
764    fn resolver_drifted_when_quote_moved() {
765        // Same quote, shifted 2 chars to the right.
766        let mut a = Anchor {
767            char_range: Some((1, 4)),
768            quote: Some("bcd".into()),
769            ..Anchor::default()
770        };
771        assert_eq!(resolve_anchor(&mut a, "xxabcde"), AnchorStatus::Drifted);
772        assert_eq!(a.char_range, Some((3, 6)));
773        assert_eq!(a.status, AnchorStatus::Drifted);
774    }
775
776    #[test]
777    fn resolver_orphan_when_quote_missing() {
778        let mut a = Anchor {
779            char_range: Some((1, 4)),
780            quote: Some("bcd".into()),
781            ..Anchor::default()
782        };
783        assert_eq!(
784            resolve_anchor(&mut a, "nothing to see"),
785            AnchorStatus::Orphan
786        );
787    }
788
789    /// #158: an unanchored `.bib` `note=` import carries only a
790    /// synthetic marker `sentence_id`. The resolver must short-circuit
791    /// to `Ok` rather than falling through to `Orphan` (which would
792    /// trip the orphan-warning UI flow).
793    #[test]
794    fn resolver_short_circuits_imported_synthetic_anchor_to_ok() {
795        let mut a = Anchor {
796            sentence_id: Some(scitadel_core::models::imported_sentence_id(
797                "smith2024",
798                "Reading note about methodology.",
799            )),
800            ..Anchor::default()
801        };
802        // Paper text deliberately contains nothing matching the
803        // synthetic id — the short-circuit must fire regardless.
804        let status = resolve_anchor(&mut a, "the body of the paper says many things.");
805        assert_eq!(status, AnchorStatus::Ok);
806        assert_eq!(a.status, AnchorStatus::Ok);
807        assert!(!a.is_orphan());
808    }
809
810    /// #185: paper-level notes carry a `paper-note:<paper_id>`
811    /// sentence_id with no quote / range. Same short-circuit story
812    /// as the import case but in its own namespace so the two
813    /// kinds can render distinctly in the TUI.
814    #[test]
815    fn resolver_short_circuits_paper_note_anchor_to_ok() {
816        let mut a = Anchor {
817            sentence_id: Some(scitadel_core::models::paper_note_sentence_id("p-attn")),
818            ..Anchor::default()
819        };
820        let status = resolve_anchor(&mut a, "irrelevant body text.");
821        assert_eq!(status, AnchorStatus::Ok);
822        assert_eq!(a.status, AnchorStatus::Ok);
823        assert!(!a.is_orphan());
824    }
825
826    /// #158: an anchor with a `quote` that fails to resolve must still
827    /// flip to `Orphan` — the synthetic short-circuit only applies to
828    /// import-only anchors with no real selectors.
829    #[test]
830    fn resolver_still_orphans_real_anchors_that_fail() {
831        let mut a = Anchor {
832            quote: Some("missing quote".into()),
833            sentence_id: Some(scitadel_core::models::sentence_id("a real sentence.")),
834            ..Anchor::default()
835        };
836        let status = resolve_anchor(&mut a, "different text without the quote.");
837        assert_eq!(status, AnchorStatus::Orphan);
838    }
839
840    // ---- Read-receipt tests ----
841
842    #[test]
843    fn unread_includes_rows_never_seen() {
844        let db = fresh_db_with_paper();
845        let repo = SqliteAnnotationRepository::new(db);
846        let a = sample_root();
847        repo.create(&a).unwrap();
848        let unread = repo.list_unread("lars", Some("p1")).unwrap();
849        assert_eq!(unread.len(), 1);
850    }
851
852    #[test]
853    fn unread_excludes_rows_seen_after_update() {
854        let db = fresh_db_with_paper();
855        let repo = SqliteAnnotationRepository::new(db);
856        let a = sample_root();
857        repo.create(&a).unwrap();
858        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
859        let unread = repo.list_unread("lars", Some("p1")).unwrap();
860        assert!(unread.is_empty(), "should be no unread after mark_seen");
861    }
862
863    #[test]
864    fn unread_reappears_after_annotation_is_updated() {
865        let db = fresh_db_with_paper();
866        let repo = SqliteAnnotationRepository::new(db);
867        let a = sample_root();
868        repo.create(&a).unwrap();
869        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
870        // Pause past the 1-second rfc3339 resolution the repo uses.
871        std::thread::sleep(std::time::Duration::from_millis(1100));
872        repo.update_note(a.id.as_str(), "edited note", None, &[])
873            .unwrap();
874        let unread = repo.list_unread("lars", Some("p1")).unwrap();
875        assert_eq!(unread.len(), 1, "edit should resurface the row as unread");
876    }
877
878    #[test]
879    fn mark_thread_seen_covers_root_and_replies() {
880        let db = fresh_db_with_paper();
881        let repo = SqliteAnnotationRepository::new(db);
882        let root = sample_root();
883        repo.create(&root).unwrap();
884        let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
885        repo.create(&reply).unwrap();
886
887        repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
888        let unread = repo.list_unread("lars", Some("p1")).unwrap();
889        assert!(unread.is_empty());
890    }
891
892    #[test]
893    fn papers_with_unread_returns_distinct_paper_ids() {
894        let db = fresh_db_with_paper();
895        // Add a second paper.
896        db.conn()
897            .unwrap()
898            .execute(
899                "INSERT INTO papers (id, title, authors, abstract, created_at, updated_at)
900                 VALUES ('p2', 'Other', '[]', '', '2026-04-28T00:00:00Z', '2026-04-28T00:00:00Z')",
901                [],
902            )
903            .unwrap();
904        let repo = SqliteAnnotationRepository::new(db);
905        let on_p1 = sample_root();
906        let on_p2 = Annotation::new_root(
907            scitadel_core::models::PaperId::from("p2"),
908            "claude".into(),
909            "n".into(),
910            Anchor::default(),
911        );
912        repo.create(&on_p1).unwrap();
913        let p1_b = Annotation::new_reply(&on_p1, "claude".into(), "follow".into());
914        repo.create(&p1_b).unwrap();
915        repo.create(&on_p2).unwrap();
916
917        let set = repo.papers_with_unread("lars").unwrap();
918        assert_eq!(set.len(), 2, "two distinct paper_ids despite three rows");
919        assert!(set.contains("p1"));
920        assert!(set.contains("p2"));
921
922        repo.mark_thread_seen(on_p1.id.as_str(), "lars").unwrap();
923        let set = repo.papers_with_unread("lars").unwrap();
924        assert_eq!(set, std::iter::once("p2".to_string()).collect());
925    }
926
927    #[test]
928    fn count_unread_matches_list_unread_length() {
929        let db = fresh_db_with_paper();
930        let repo = SqliteAnnotationRepository::new(db);
931        let root = sample_root();
932        repo.create(&root).unwrap();
933        let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
934        repo.create(&reply).unwrap();
935
936        // Both unread for lars.
937        assert_eq!(repo.count_unread("lars", None).unwrap(), 2);
938        assert_eq!(repo.count_unread("lars", Some("p1")).unwrap(), 2);
939
940        repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
941        assert_eq!(repo.count_unread("lars", None).unwrap(), 0);
942        assert_eq!(repo.count_unread("lars", Some("p1")).unwrap(), 0);
943
944        // Soft-delete must not be counted.
945        let solo = Annotation::new_root(
946            scitadel_core::models::PaperId::from("p1"),
947            "lars".into(),
948            "doomed".into(),
949            Anchor::default(),
950        );
951        repo.create(&solo).unwrap();
952        repo.soft_delete(solo.id.as_str()).unwrap();
953        assert_eq!(repo.count_unread("lars", None).unwrap(), 0);
954    }
955
956    #[test]
957    fn independent_readers_track_state_independently() {
958        let db = fresh_db_with_paper();
959        let repo = SqliteAnnotationRepository::new(db);
960        let a = sample_root();
961        repo.create(&a).unwrap();
962        repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
963        assert!(repo.list_unread("lars", Some("p1")).unwrap().is_empty());
964        assert_eq!(repo.list_unread("claude", Some("p1")).unwrap().len(), 1);
965    }
966
967    #[test]
968    fn resolver_handles_multibyte_chars() {
969        // U+2019 (curly apostrophe) is 3 bytes / 1 char.
970        let text = "D\u{2019}Ippolito wrote that...";
971        let quote = "D\u{2019}Ippolito";
972        let mut a = Anchor {
973            char_range: Some((0, quote.chars().count())),
974            quote: Some(quote.into()),
975            ..Anchor::default()
976        };
977        assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Ok);
978    }
979
980    // ---- #96 multi-selector resolver tests ----
981
982    #[test]
983    fn resolver_uses_prefix_to_disambiguate_collision() {
984        // "the model" appears twice; suffix " was trained" picks the second.
985        let text = "Initially the model failed. Then the model was trained on more data.";
986        let mut a = Anchor {
987            char_range: None,
988            quote: Some("the model".into()),
989            prefix: None,
990            suffix: Some(" was trained".into()),
991            ..Anchor::default()
992        };
993        assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Drifted);
994        let (s, e) = a.char_range.unwrap();
995        assert_eq!(&text[s..e], "the model");
996        // Specifically the *second* occurrence.
997        assert!(s > 20, "expected the second occurrence at s>20, got s={s}");
998    }
999
1000    #[test]
1001    fn resolver_falls_back_to_fuzzy_on_minor_edit() {
1002        // Quote was "the network was deep"; publisher edited to "the
1003        // network was very deep" — substring fails, fuzzy still hits.
1004        let text = "We argued the network was very deep enough to overfit.";
1005        let mut a = Anchor {
1006            char_range: None,
1007            quote: Some("the network was deep".into()),
1008            ..Anchor::default()
1009        };
1010        // Use a permissive threshold so the test isn't sensitive to
1011        // strsim version drift.
1012        let s = resolve_anchor_with_threshold(&mut a, text, 0.85);
1013        assert_eq!(
1014            s,
1015            AnchorStatus::Drifted,
1016            "fuzzy match should drift, got {s:?}"
1017        );
1018    }
1019
1020    #[test]
1021    fn resolver_returns_orphan_when_offsets_oob_and_quote_absent() {
1022        // char_range out of bounds, quote not present in text — must
1023        // return Orphan instead of panicking. (#96 gap 4)
1024        let mut a = Anchor {
1025            char_range: Some((9000, 9100)),
1026            quote: Some("vanished".into()),
1027            ..Anchor::default()
1028        };
1029        assert_eq!(
1030            resolve_anchor(&mut a, "the small text"),
1031            AnchorStatus::Orphan
1032        );
1033    }
1034
1035    #[test]
1036    fn resolver_uses_sentence_id_when_quote_unfindable() {
1037        use scitadel_core::models::sentence_id;
1038        // Sentence content preserved (same words, different
1039        // case/whitespace). Quote string is wholly absent from the
1040        // new text so substring + fuzzy fail; sentence-id rescues.
1041        let original_sentence = "The Transformer Architecture relies on self-attention.";
1042        let new_text = "Intro. the   transformer architecture relies on self-attention. Outro.";
1043        let mut a = Anchor {
1044            char_range: None,
1045            // Bypasses substring + fuzzy.
1046            quote: Some("ZZZ-not-in-new-text-ZZZ".into()),
1047            sentence_id: Some(sentence_id(original_sentence)),
1048            ..Anchor::default()
1049        };
1050        let s = resolve_anchor(&mut a, new_text);
1051        assert_eq!(
1052            s,
1053            AnchorStatus::Drifted,
1054            "sentence-id rescue should mark Drifted, got {s:?}"
1055        );
1056        let (start, end) = a.char_range.unwrap();
1057        let resolved: String = new_text.chars().skip(start).take(end - start).collect();
1058        assert!(
1059            resolved.contains("transformer architecture"),
1060            "expected re-anchor to the matching sentence; got {resolved:?}"
1061        );
1062    }
1063}