1use chrono::{DateTime, Utc};
14use rusqlite::{OptionalExtension, params};
15use scitadel_core::models::{Anchor, AnchorStatus, Annotation, AnnotationId, PaperId, QuestionId};
16
17use crate::error::DbError;
18use crate::sqlite::Database;
19
20#[derive(Clone)]
21pub struct SqliteAnnotationRepository {
22 db: Database,
23}
24
25impl SqliteAnnotationRepository {
26 pub fn new(db: Database) -> Self {
27 Self { db }
28 }
29
30 pub fn create(&self, annotation: &Annotation) -> Result<(), DbError> {
33 let conn = self.db.conn()?;
34 Self::insert_via(&conn, annotation)
35 }
36
37 pub fn create_in_tx(
41 tx: &rusqlite::Transaction<'_>,
42 annotation: &Annotation,
43 ) -> Result<(), DbError> {
44 Self::insert_via(tx, annotation)
45 }
46
47 fn insert_via(conn: &rusqlite::Connection, annotation: &Annotation) -> Result<(), DbError> {
48 conn.execute(
49 "INSERT INTO annotations
50 (id, parent_id, paper_id, question_id,
51 char_start, char_end, quote, prefix, suffix,
52 sentence_id, source_version, anchor_status,
53 note, color, tags_json, author,
54 created_at, updated_at, deleted_at)
55 VALUES (?1, ?2, ?3, ?4,
56 ?5, ?6, ?7, ?8, ?9,
57 ?10, ?11, ?12,
58 ?13, ?14, ?15, ?16,
59 ?17, ?18, ?19)",
60 params![
61 annotation.id.as_str(),
62 annotation.parent_id.as_ref().map(AnnotationId::as_str),
63 annotation.paper_id.as_str(),
64 annotation.question_id.as_ref().map(QuestionId::as_str),
65 annotation.anchor.char_range.map(|(s, _)| s as i64),
66 annotation.anchor.char_range.map(|(_, e)| e as i64),
67 annotation.anchor.quote,
68 annotation.anchor.prefix,
69 annotation.anchor.suffix,
70 annotation.anchor.sentence_id,
71 annotation.anchor.source_version,
72 annotation.anchor.status.as_str(),
73 annotation.note,
74 annotation.color,
75 serde_json::to_string(&annotation.tags).unwrap_or_else(|_| "[]".into()),
76 annotation.author,
77 annotation.created_at.to_rfc3339(),
78 annotation.updated_at.to_rfc3339(),
79 annotation.deleted_at.map(|d| d.to_rfc3339()),
80 ],
81 )?;
82 Ok(())
83 }
84
85 pub fn get(&self, id: &str) -> Result<Option<Annotation>, DbError> {
87 let conn = self.db.conn()?;
88 let mut stmt =
89 conn.prepare("SELECT * FROM annotations WHERE id = ?1 AND deleted_at IS NULL")?;
90 let out = stmt.query_row(params![id], row_to_annotation).optional()?;
91 Ok(out)
92 }
93
94 pub fn list_by_paper(&self, paper_id: &str) -> Result<Vec<Annotation>, DbError> {
96 let conn = self.db.conn()?;
97 let mut stmt = conn.prepare(
98 "SELECT * FROM annotations
99 WHERE paper_id = ?1 AND deleted_at IS NULL
100 ORDER BY created_at ASC",
101 )?;
102 let rows = stmt.query_map(params![paper_id], row_to_annotation)?;
103 Ok(rows.filter_map(Result::ok).collect())
104 }
105
106 pub fn list_replies(&self, parent_id: &str) -> Result<Vec<Annotation>, DbError> {
108 let conn = self.db.conn()?;
109 let mut stmt = conn.prepare(
110 "SELECT * FROM annotations
111 WHERE parent_id = ?1 AND deleted_at IS NULL
112 ORDER BY created_at ASC",
113 )?;
114 let rows = stmt.query_map(params![parent_id], row_to_annotation)?;
115 Ok(rows.filter_map(Result::ok).collect())
116 }
117
118 pub fn update_note(
121 &self,
122 id: &str,
123 note: &str,
124 color: Option<&str>,
125 tags: &[String],
126 ) -> Result<(), DbError> {
127 let conn = self.db.conn()?;
128 conn.execute(
129 "UPDATE annotations
130 SET note = ?1, color = ?2, tags_json = ?3, updated_at = ?4
131 WHERE id = ?5",
132 params![
133 note,
134 color,
135 serde_json::to_string(tags).unwrap_or_else(|_| "[]".into()),
136 Utc::now().to_rfc3339(),
137 id,
138 ],
139 )?;
140 Ok(())
141 }
142
143 pub fn update_anchor(&self, id: &str, anchor: &Anchor) -> Result<(), DbError> {
146 let conn = self.db.conn()?;
147 conn.execute(
148 "UPDATE annotations
149 SET char_start = ?1, char_end = ?2,
150 anchor_status = ?3, updated_at = ?4
151 WHERE id = ?5",
152 params![
153 anchor.char_range.map(|(s, _)| s as i64),
154 anchor.char_range.map(|(_, e)| e as i64),
155 anchor.status.as_str(),
156 Utc::now().to_rfc3339(),
157 id,
158 ],
159 )?;
160 Ok(())
161 }
162
163 pub fn soft_delete(&self, id: &str) -> Result<(), DbError> {
166 let conn = self.db.conn()?;
167 conn.execute(
168 "UPDATE annotations SET deleted_at = ?1 WHERE id = ?2",
169 params![Utc::now().to_rfc3339(), id],
170 )?;
171 Ok(())
172 }
173
174 pub fn mark_seen(&self, annotation_ids: &[&str], reader: &str) -> Result<(), DbError> {
177 if annotation_ids.is_empty() {
178 return Ok(());
179 }
180 let mut conn = self.db.conn()?;
181 let tx = conn.transaction()?;
182 let now = Utc::now().to_rfc3339();
183 for id in annotation_ids {
184 tx.execute(
185 "INSERT INTO annotation_reads (annotation_id, reader, seen_at)
186 VALUES (?1, ?2, ?3)
187 ON CONFLICT(annotation_id, reader) DO UPDATE SET seen_at = excluded.seen_at",
188 params![id, reader, now],
189 )?;
190 }
191 tx.commit()?;
192 Ok(())
193 }
194
195 pub fn mark_thread_seen(&self, root_id: &str, reader: &str) -> Result<(), DbError> {
198 let replies = self.list_replies(root_id)?;
199 let mut ids: Vec<&str> = replies.iter().map(|a| a.id.as_str()).collect();
200 ids.push(root_id);
201 self.mark_seen(&ids, reader)
202 }
203
204 pub fn list_unread(
210 &self,
211 reader: &str,
212 paper_id: Option<&str>,
213 ) -> Result<Vec<Annotation>, DbError> {
214 let conn = self.db.conn()?;
215 let (sql, rows) = if let Some(pid) = paper_id {
216 let mut stmt = conn.prepare(
217 "SELECT a.* FROM annotations a
218 LEFT JOIN annotation_reads r
219 ON r.annotation_id = a.id AND r.reader = ?1
220 WHERE a.paper_id = ?2
221 AND a.deleted_at IS NULL
222 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
223 ORDER BY a.created_at ASC",
224 )?;
225 let rows = stmt
226 .query_map(params![reader, pid], row_to_annotation)?
227 .filter_map(Result::ok)
228 .collect::<Vec<_>>();
229 ("scoped", rows)
230 } else {
231 let mut stmt = conn.prepare(
232 "SELECT a.* FROM annotations a
233 LEFT JOIN annotation_reads r
234 ON r.annotation_id = a.id AND r.reader = ?1
235 WHERE a.deleted_at IS NULL
236 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
237 ORDER BY a.created_at ASC",
238 )?;
239 let rows = stmt
240 .query_map(params![reader], row_to_annotation)?
241 .filter_map(Result::ok)
242 .collect::<Vec<_>>();
243 ("all", rows)
244 };
245 let _ = sql; Ok(rows)
247 }
248
249 pub fn papers_with_unread(
253 &self,
254 reader: &str,
255 ) -> Result<std::collections::HashSet<String>, DbError> {
256 let conn = self.db.conn()?;
257 let mut stmt = conn.prepare(
258 "SELECT DISTINCT a.paper_id FROM annotations a
259 LEFT JOIN annotation_reads r
260 ON r.annotation_id = a.id AND r.reader = ?1
261 WHERE a.deleted_at IS NULL
262 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
263 )?;
264 let rows = stmt
265 .query_map(params![reader], |row| row.get::<_, String>(0))?
266 .filter_map(Result::ok);
267 Ok(rows.collect())
268 }
269
270 pub fn count_unread(&self, reader: &str, paper_id: Option<&str>) -> Result<i64, DbError> {
275 let conn = self.db.conn()?;
276 let n: i64 = if let Some(pid) = paper_id {
277 conn.query_row(
278 "SELECT COUNT(*) FROM annotations a
279 LEFT JOIN annotation_reads r
280 ON r.annotation_id = a.id AND r.reader = ?1
281 WHERE a.paper_id = ?2
282 AND a.deleted_at IS NULL
283 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
284 params![reader, pid],
285 |row| row.get(0),
286 )?
287 } else {
288 conn.query_row(
289 "SELECT COUNT(*) FROM annotations a
290 LEFT JOIN annotation_reads r
291 ON r.annotation_id = a.id AND r.reader = ?1
292 WHERE a.deleted_at IS NULL
293 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)",
294 params![reader],
295 |row| row.get(0),
296 )?
297 };
298 Ok(n)
299 }
300}
301
302fn row_to_annotation(row: &rusqlite::Row) -> rusqlite::Result<Annotation> {
303 let char_start: Option<i64> = row.get("char_start")?;
304 let char_end: Option<i64> = row.get("char_end")?;
305 let char_range = match (char_start, char_end) {
306 (Some(s), Some(e)) => Some((s as usize, e as usize)),
307 _ => None,
308 };
309 let anchor_status_str: Option<String> = row.get("anchor_status")?;
310 let anchor = Anchor {
311 char_range,
312 quote: row.get("quote")?,
313 prefix: row.get("prefix")?,
314 suffix: row.get("suffix")?,
315 sentence_id: row.get("sentence_id")?,
316 source_version: row.get("source_version")?,
317 status: anchor_status_str
318 .as_deref()
319 .and_then(AnchorStatus::parse)
320 .unwrap_or_default(),
321 };
322
323 let tags_json: String = row.get("tags_json")?;
324 let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
325
326 let parent_id: Option<String> = row.get("parent_id")?;
327 let question_id: Option<String> = row.get("question_id")?;
328 let created_at: String = row.get("created_at")?;
329 let updated_at: String = row.get("updated_at")?;
330 let deleted_at: Option<String> = row.get("deleted_at")?;
331
332 Ok(Annotation {
333 id: AnnotationId::from(row.get::<_, String>("id")?),
334 parent_id: parent_id.map(AnnotationId::from),
335 paper_id: PaperId::from(row.get::<_, String>("paper_id")?),
336 question_id: question_id.map(QuestionId::from),
337 anchor,
338 note: row.get("note")?,
339 color: row.get("color")?,
340 tags,
341 author: row.get("author")?,
342 created_at: parse_dt(&created_at),
343 updated_at: parse_dt(&updated_at),
344 deleted_at: deleted_at.as_deref().map(parse_dt),
345 })
346}
347
348fn parse_dt(s: &str) -> DateTime<Utc> {
349 DateTime::parse_from_rfc3339(s).map_or_else(|_| Utc::now(), |dt| dt.with_timezone(&Utc))
350}
351
352pub const FUZZY_THRESHOLD: f64 = 0.9;
356
357pub fn resolve_anchor(anchor: &mut Anchor, text: &str) -> AnchorStatus {
377 resolve_anchor_with_threshold(anchor, text, FUZZY_THRESHOLD)
378}
379
380pub fn resolve_anchor_with_threshold(
381 anchor: &mut Anchor,
382 text: &str,
383 fuzzy_threshold: f64,
384) -> AnchorStatus {
385 if anchor.is_imported_synthetic() {
393 anchor.status = AnchorStatus::Ok;
394 return AnchorStatus::Ok;
395 }
396
397 if anchor.is_paper_note() {
404 anchor.status = AnchorStatus::Ok;
405 return AnchorStatus::Ok;
406 }
407
408 if let (Some((start, end)), Some(quote)) = (anchor.char_range, anchor.quote.as_ref())
410 && let Some(slice) = char_slice(text, start, end)
411 && &slice == quote
412 {
413 anchor.status = AnchorStatus::Ok;
414 return AnchorStatus::Ok;
415 }
416
417 if let Some(quote) = anchor.quote.as_ref()
419 && let Some((sc, ec)) = find_with_context(
420 text,
421 quote,
422 anchor.prefix.as_deref(),
423 anchor.suffix.as_deref(),
424 )
425 {
426 anchor.char_range = Some((sc, ec));
427 anchor.status = AnchorStatus::Drifted;
428 return AnchorStatus::Drifted;
429 }
430
431 if let Some(quote) = anchor.quote.as_ref()
433 && let Some((sc, ec)) = fuzzy_find(text, quote, fuzzy_threshold)
434 {
435 anchor.char_range = Some((sc, ec));
436 anchor.status = AnchorStatus::Drifted;
437 return AnchorStatus::Drifted;
438 }
439
440 if let Some(sid) = anchor.sentence_id.as_ref()
442 && let Some((sc, ec)) = find_sentence_by_id(text, sid)
443 {
444 anchor.char_range = Some((sc, ec));
445 anchor.status = AnchorStatus::Drifted;
446 return AnchorStatus::Drifted;
447 }
448
449 anchor.status = AnchorStatus::Orphan;
450 AnchorStatus::Orphan
451}
452
453fn char_slice(text: &str, start: usize, end: usize) -> Option<String> {
457 if end < start {
458 return None;
459 }
460 let want = end - start;
461 let collected: String = text.chars().skip(start).take(want).collect();
462 if collected.chars().count() == want {
463 Some(collected)
464 } else {
465 None
466 }
467}
468
469fn find_all(text: &str, quote: &str) -> Vec<(usize, usize)> {
472 if quote.is_empty() {
473 return Vec::new();
474 }
475 let mut out = Vec::new();
476 let qlen_chars = quote.chars().count();
477 let mut search_byte = 0;
478 while let Some(rel) = text[search_byte..].find(quote) {
479 let abs = search_byte + rel;
480 let start_char = text[..abs].chars().count();
481 out.push((start_char, start_char + qlen_chars));
482 search_byte = abs + quote.len(); }
484 out
485}
486
487fn find_with_context(
492 text: &str,
493 quote: &str,
494 prefix: Option<&str>,
495 suffix: Option<&str>,
496) -> Option<(usize, usize)> {
497 let occurrences = find_all(text, quote);
498 if occurrences.is_empty() {
499 return None;
500 }
501 if occurrences.len() == 1 || (prefix.is_none() && suffix.is_none()) {
502 return Some(occurrences[0]);
503 }
504
505 let chars: Vec<char> = text.chars().collect();
506 occurrences
507 .into_iter()
508 .max_by_key(|&(sc, ec)| context_score(&chars, sc, ec, prefix, suffix))
509}
510
511fn context_score(
515 chars: &[char],
516 start: usize,
517 end: usize,
518 prefix: Option<&str>,
519 suffix: Option<&str>,
520) -> i64 {
521 let mut score = 0i64;
522 if let Some(p) = prefix {
523 let want: Vec<char> = p.chars().collect();
524 let max = want.len().min(start);
525 for i in 0..max {
526 if chars[start - 1 - i] == want[want.len() - 1 - i] {
528 score += 1;
529 } else {
530 break;
531 }
532 }
533 }
534 if let Some(s) = suffix {
535 let want: Vec<char> = s.chars().collect();
536 let max = want.len().min(chars.len().saturating_sub(end));
537 for i in 0..max {
538 if chars[end + i] == want[i] {
539 score += 1;
540 } else {
541 break;
542 }
543 }
544 }
545 score
546}
547
548fn fuzzy_find(text: &str, quote: &str, threshold: f64) -> Option<(usize, usize)> {
552 if quote.is_empty() {
553 return None;
554 }
555 let chars: Vec<char> = text.chars().collect();
556 let qlen = quote.chars().count();
557 if chars.len() < qlen {
558 return None;
559 }
560
561 let mut best: Option<(usize, f64)> = None;
562 for start in 0..=chars.len() - qlen {
563 let window: String = chars[start..start + qlen].iter().collect();
564 let score = strsim::jaro_winkler(&window, quote);
565 if score >= threshold && best.is_none_or(|(_, b)| score > b) {
566 best = Some((start, score));
567 }
568 }
569 best.map(|(start, _)| (start, start + qlen))
570}
571
572fn find_sentence_by_id(text: &str, sid: &str) -> Option<(usize, usize)> {
577 let chars: Vec<char> = text.chars().collect();
578 let mut sentence_start_char = 0;
579 let mut i = 0;
580 while i < chars.len() {
581 let ch = chars[i];
582 let is_terminator = matches!(ch, '.' | '!' | '?');
583 let is_end = i + 1 == chars.len();
584 if is_terminator || is_end {
585 let end = if is_end { chars.len() } else { i + 1 };
586 let sentence: String = chars[sentence_start_char..end].iter().collect();
587 let trimmed = sentence.trim();
588 if !trimmed.is_empty() && scitadel_core::models::sentence_id(trimmed) == sid {
589 let leading_ws = sentence.chars().take_while(|c| c.is_whitespace()).count();
591 let trailing_ws = sentence
592 .chars()
593 .rev()
594 .take_while(|c| c.is_whitespace())
595 .count();
596 let trimmed_start = sentence_start_char + leading_ws;
597 let trimmed_end = end - trailing_ws;
598 if trimmed_end > trimmed_start {
599 return Some((trimmed_start, trimmed_end));
600 }
601 }
602 sentence_start_char = end;
604 }
605 i += 1;
606 }
607 None
608}
609
610#[cfg(test)]
611mod tests {
612 use super::*;
613 use scitadel_core::models::Annotation;
614
615 fn fresh_db_with_paper() -> Database {
616 let db = Database::open_in_memory().unwrap();
617 db.migrate().unwrap();
618 let conn = db.conn().unwrap();
619 conn.execute(
620 "INSERT INTO papers (id, title, created_at, updated_at)
621 VALUES ('p1', 't', datetime('now'), datetime('now'))",
622 [],
623 )
624 .unwrap();
625 db
626 }
627
628 fn sample_root() -> Annotation {
629 Annotation::new_root(
630 PaperId::from("p1"),
631 "lars".into(),
632 "important passage".into(),
633 Anchor {
634 char_range: Some((10, 25)),
635 quote: Some("neutron energy".into()),
636 ..Anchor::default()
637 },
638 )
639 }
640
641 #[test]
656 fn annotation_writes_are_offline_safe() {
657 let db = fresh_db_with_paper();
658 let repo = SqliteAnnotationRepository::new(db);
659
660 let root = sample_root();
664 repo.create(&root).unwrap();
665 let reply = Annotation::new_reply(&root, "claude".into(), "seconded".into());
666 repo.create(&reply).unwrap();
667 repo.update_note(root.id.as_str(), "edited offline", None, &[])
668 .unwrap();
669 repo.soft_delete(reply.id.as_str()).unwrap();
670
671 let all = repo.list_by_paper("p1").unwrap();
673 assert_eq!(all.len(), 1, "root survives; reply tombstoned out");
674 assert_eq!(all[0].note, "edited offline");
675 }
676
677 #[test]
678 fn create_and_get_roundtrip() {
679 let db = fresh_db_with_paper();
680 let repo = SqliteAnnotationRepository::new(db);
681 let root = sample_root();
682 repo.create(&root).unwrap();
683
684 let loaded = repo.get(root.id.as_str()).unwrap().expect("present");
685 assert_eq!(loaded.note, "important passage");
686 assert_eq!(loaded.anchor.char_range, Some((10, 25)));
687 assert_eq!(loaded.anchor.quote.as_deref(), Some("neutron energy"));
688 }
689
690 #[test]
691 fn replies_threaded_under_root() {
692 let db = fresh_db_with_paper();
693 let repo = SqliteAnnotationRepository::new(db);
694 let root = sample_root();
695 repo.create(&root).unwrap();
696 let reply = Annotation::new_reply(&root, "claude".into(), "see fig 4".into());
697 repo.create(&reply).unwrap();
698
699 let replies = repo.list_replies(root.id.as_str()).unwrap();
700 assert_eq!(replies.len(), 1);
701 assert_eq!(replies[0].note, "see fig 4");
702 }
703
704 #[test]
705 fn soft_delete_hides_from_listings_but_thread_preserved() {
706 let db = fresh_db_with_paper();
707 let repo = SqliteAnnotationRepository::new(db);
708 let root = sample_root();
709 repo.create(&root).unwrap();
710 let reply = Annotation::new_reply(&root, "claude".into(), "yep".into());
711 repo.create(&reply).unwrap();
712
713 repo.soft_delete(root.id.as_str()).unwrap();
714
715 assert!(repo.get(root.id.as_str()).unwrap().is_none());
717 assert!(
718 repo.list_by_paper("p1")
719 .unwrap()
720 .iter()
721 .all(|a| a.id != root.id)
722 );
723 let replies = repo.list_replies(root.id.as_str()).unwrap();
726 assert_eq!(replies.len(), 1);
727 }
728
729 #[test]
730 fn update_note_persists() {
731 let db = fresh_db_with_paper();
732 let repo = SqliteAnnotationRepository::new(db);
733 let root = sample_root();
734 repo.create(&root).unwrap();
735
736 repo.update_note(
737 root.id.as_str(),
738 "new note",
739 Some("blue"),
740 &["tag1".into(), "tag2".into()],
741 )
742 .unwrap();
743
744 let loaded = repo.get(root.id.as_str()).unwrap().unwrap();
745 assert_eq!(loaded.note, "new note");
746 assert_eq!(loaded.color.as_deref(), Some("blue"));
747 assert_eq!(loaded.tags, vec!["tag1".to_string(), "tag2".to_string()]);
748 }
749
750 #[test]
753 fn resolver_ok_when_text_unchanged() {
754 let mut a = Anchor {
756 char_range: Some((1, 4)),
757 quote: Some("bcd".into()),
758 ..Anchor::default()
759 };
760 assert_eq!(resolve_anchor(&mut a, "abcde"), AnchorStatus::Ok);
761 }
762
763 #[test]
764 fn resolver_drifted_when_quote_moved() {
765 let mut a = Anchor {
767 char_range: Some((1, 4)),
768 quote: Some("bcd".into()),
769 ..Anchor::default()
770 };
771 assert_eq!(resolve_anchor(&mut a, "xxabcde"), AnchorStatus::Drifted);
772 assert_eq!(a.char_range, Some((3, 6)));
773 assert_eq!(a.status, AnchorStatus::Drifted);
774 }
775
776 #[test]
777 fn resolver_orphan_when_quote_missing() {
778 let mut a = Anchor {
779 char_range: Some((1, 4)),
780 quote: Some("bcd".into()),
781 ..Anchor::default()
782 };
783 assert_eq!(
784 resolve_anchor(&mut a, "nothing to see"),
785 AnchorStatus::Orphan
786 );
787 }
788
789 #[test]
794 fn resolver_short_circuits_imported_synthetic_anchor_to_ok() {
795 let mut a = Anchor {
796 sentence_id: Some(scitadel_core::models::imported_sentence_id(
797 "smith2024",
798 "Reading note about methodology.",
799 )),
800 ..Anchor::default()
801 };
802 let status = resolve_anchor(&mut a, "the body of the paper says many things.");
805 assert_eq!(status, AnchorStatus::Ok);
806 assert_eq!(a.status, AnchorStatus::Ok);
807 assert!(!a.is_orphan());
808 }
809
810 #[test]
815 fn resolver_short_circuits_paper_note_anchor_to_ok() {
816 let mut a = Anchor {
817 sentence_id: Some(scitadel_core::models::paper_note_sentence_id("p-attn")),
818 ..Anchor::default()
819 };
820 let status = resolve_anchor(&mut a, "irrelevant body text.");
821 assert_eq!(status, AnchorStatus::Ok);
822 assert_eq!(a.status, AnchorStatus::Ok);
823 assert!(!a.is_orphan());
824 }
825
826 #[test]
830 fn resolver_still_orphans_real_anchors_that_fail() {
831 let mut a = Anchor {
832 quote: Some("missing quote".into()),
833 sentence_id: Some(scitadel_core::models::sentence_id("a real sentence.")),
834 ..Anchor::default()
835 };
836 let status = resolve_anchor(&mut a, "different text without the quote.");
837 assert_eq!(status, AnchorStatus::Orphan);
838 }
839
840 #[test]
843 fn unread_includes_rows_never_seen() {
844 let db = fresh_db_with_paper();
845 let repo = SqliteAnnotationRepository::new(db);
846 let a = sample_root();
847 repo.create(&a).unwrap();
848 let unread = repo.list_unread("lars", Some("p1")).unwrap();
849 assert_eq!(unread.len(), 1);
850 }
851
852 #[test]
853 fn unread_excludes_rows_seen_after_update() {
854 let db = fresh_db_with_paper();
855 let repo = SqliteAnnotationRepository::new(db);
856 let a = sample_root();
857 repo.create(&a).unwrap();
858 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
859 let unread = repo.list_unread("lars", Some("p1")).unwrap();
860 assert!(unread.is_empty(), "should be no unread after mark_seen");
861 }
862
863 #[test]
864 fn unread_reappears_after_annotation_is_updated() {
865 let db = fresh_db_with_paper();
866 let repo = SqliteAnnotationRepository::new(db);
867 let a = sample_root();
868 repo.create(&a).unwrap();
869 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
870 std::thread::sleep(std::time::Duration::from_millis(1100));
872 repo.update_note(a.id.as_str(), "edited note", None, &[])
873 .unwrap();
874 let unread = repo.list_unread("lars", Some("p1")).unwrap();
875 assert_eq!(unread.len(), 1, "edit should resurface the row as unread");
876 }
877
878 #[test]
879 fn mark_thread_seen_covers_root_and_replies() {
880 let db = fresh_db_with_paper();
881 let repo = SqliteAnnotationRepository::new(db);
882 let root = sample_root();
883 repo.create(&root).unwrap();
884 let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
885 repo.create(&reply).unwrap();
886
887 repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
888 let unread = repo.list_unread("lars", Some("p1")).unwrap();
889 assert!(unread.is_empty());
890 }
891
892 #[test]
893 fn papers_with_unread_returns_distinct_paper_ids() {
894 let db = fresh_db_with_paper();
895 db.conn()
897 .unwrap()
898 .execute(
899 "INSERT INTO papers (id, title, authors, abstract, created_at, updated_at)
900 VALUES ('p2', 'Other', '[]', '', '2026-04-28T00:00:00Z', '2026-04-28T00:00:00Z')",
901 [],
902 )
903 .unwrap();
904 let repo = SqliteAnnotationRepository::new(db);
905 let on_p1 = sample_root();
906 let on_p2 = Annotation::new_root(
907 scitadel_core::models::PaperId::from("p2"),
908 "claude".into(),
909 "n".into(),
910 Anchor::default(),
911 );
912 repo.create(&on_p1).unwrap();
913 let p1_b = Annotation::new_reply(&on_p1, "claude".into(), "follow".into());
914 repo.create(&p1_b).unwrap();
915 repo.create(&on_p2).unwrap();
916
917 let set = repo.papers_with_unread("lars").unwrap();
918 assert_eq!(set.len(), 2, "two distinct paper_ids despite three rows");
919 assert!(set.contains("p1"));
920 assert!(set.contains("p2"));
921
922 repo.mark_thread_seen(on_p1.id.as_str(), "lars").unwrap();
923 let set = repo.papers_with_unread("lars").unwrap();
924 assert_eq!(set, std::iter::once("p2".to_string()).collect());
925 }
926
927 #[test]
928 fn count_unread_matches_list_unread_length() {
929 let db = fresh_db_with_paper();
930 let repo = SqliteAnnotationRepository::new(db);
931 let root = sample_root();
932 repo.create(&root).unwrap();
933 let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
934 repo.create(&reply).unwrap();
935
936 assert_eq!(repo.count_unread("lars", None).unwrap(), 2);
938 assert_eq!(repo.count_unread("lars", Some("p1")).unwrap(), 2);
939
940 repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
941 assert_eq!(repo.count_unread("lars", None).unwrap(), 0);
942 assert_eq!(repo.count_unread("lars", Some("p1")).unwrap(), 0);
943
944 let solo = Annotation::new_root(
946 scitadel_core::models::PaperId::from("p1"),
947 "lars".into(),
948 "doomed".into(),
949 Anchor::default(),
950 );
951 repo.create(&solo).unwrap();
952 repo.soft_delete(solo.id.as_str()).unwrap();
953 assert_eq!(repo.count_unread("lars", None).unwrap(), 0);
954 }
955
956 #[test]
957 fn independent_readers_track_state_independently() {
958 let db = fresh_db_with_paper();
959 let repo = SqliteAnnotationRepository::new(db);
960 let a = sample_root();
961 repo.create(&a).unwrap();
962 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
963 assert!(repo.list_unread("lars", Some("p1")).unwrap().is_empty());
964 assert_eq!(repo.list_unread("claude", Some("p1")).unwrap().len(), 1);
965 }
966
967 #[test]
968 fn resolver_handles_multibyte_chars() {
969 let text = "D\u{2019}Ippolito wrote that...";
971 let quote = "D\u{2019}Ippolito";
972 let mut a = Anchor {
973 char_range: Some((0, quote.chars().count())),
974 quote: Some(quote.into()),
975 ..Anchor::default()
976 };
977 assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Ok);
978 }
979
980 #[test]
983 fn resolver_uses_prefix_to_disambiguate_collision() {
984 let text = "Initially the model failed. Then the model was trained on more data.";
986 let mut a = Anchor {
987 char_range: None,
988 quote: Some("the model".into()),
989 prefix: None,
990 suffix: Some(" was trained".into()),
991 ..Anchor::default()
992 };
993 assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Drifted);
994 let (s, e) = a.char_range.unwrap();
995 assert_eq!(&text[s..e], "the model");
996 assert!(s > 20, "expected the second occurrence at s>20, got s={s}");
998 }
999
1000 #[test]
1001 fn resolver_falls_back_to_fuzzy_on_minor_edit() {
1002 let text = "We argued the network was very deep enough to overfit.";
1005 let mut a = Anchor {
1006 char_range: None,
1007 quote: Some("the network was deep".into()),
1008 ..Anchor::default()
1009 };
1010 let s = resolve_anchor_with_threshold(&mut a, text, 0.85);
1013 assert_eq!(
1014 s,
1015 AnchorStatus::Drifted,
1016 "fuzzy match should drift, got {s:?}"
1017 );
1018 }
1019
1020 #[test]
1021 fn resolver_returns_orphan_when_offsets_oob_and_quote_absent() {
1022 let mut a = Anchor {
1025 char_range: Some((9000, 9100)),
1026 quote: Some("vanished".into()),
1027 ..Anchor::default()
1028 };
1029 assert_eq!(
1030 resolve_anchor(&mut a, "the small text"),
1031 AnchorStatus::Orphan
1032 );
1033 }
1034
1035 #[test]
1036 fn resolver_uses_sentence_id_when_quote_unfindable() {
1037 use scitadel_core::models::sentence_id;
1038 let original_sentence = "The Transformer Architecture relies on self-attention.";
1042 let new_text = "Intro. the transformer architecture relies on self-attention. Outro.";
1043 let mut a = Anchor {
1044 char_range: None,
1045 quote: Some("ZZZ-not-in-new-text-ZZZ".into()),
1047 sentence_id: Some(sentence_id(original_sentence)),
1048 ..Anchor::default()
1049 };
1050 let s = resolve_anchor(&mut a, new_text);
1051 assert_eq!(
1052 s,
1053 AnchorStatus::Drifted,
1054 "sentence-id rescue should mark Drifted, got {s:?}"
1055 );
1056 let (start, end) = a.char_range.unwrap();
1057 let resolved: String = new_text.chars().skip(start).take(end - start).collect();
1058 assert!(
1059 resolved.contains("transformer architecture"),
1060 "expected re-anchor to the matching sentence; got {resolved:?}"
1061 );
1062 }
1063}