1use chrono::{DateTime, Utc};
14use rusqlite::{OptionalExtension, params};
15use scitadel_core::models::{Anchor, AnchorStatus, Annotation, AnnotationId, PaperId, QuestionId};
16
17use crate::error::DbError;
18use crate::sqlite::Database;
19
20#[derive(Clone)]
21pub struct SqliteAnnotationRepository {
22 db: Database,
23}
24
25impl SqliteAnnotationRepository {
26 pub fn new(db: Database) -> Self {
27 Self { db }
28 }
29
30 pub fn create(&self, annotation: &Annotation) -> Result<(), DbError> {
33 let conn = self.db.conn()?;
34 conn.execute(
35 "INSERT INTO annotations
36 (id, parent_id, paper_id, question_id,
37 char_start, char_end, quote, prefix, suffix,
38 sentence_id, source_version, anchor_status,
39 note, color, tags_json, author,
40 created_at, updated_at, deleted_at)
41 VALUES (?1, ?2, ?3, ?4,
42 ?5, ?6, ?7, ?8, ?9,
43 ?10, ?11, ?12,
44 ?13, ?14, ?15, ?16,
45 ?17, ?18, ?19)",
46 params![
47 annotation.id.as_str(),
48 annotation.parent_id.as_ref().map(AnnotationId::as_str),
49 annotation.paper_id.as_str(),
50 annotation.question_id.as_ref().map(QuestionId::as_str),
51 annotation.anchor.char_range.map(|(s, _)| s as i64),
52 annotation.anchor.char_range.map(|(_, e)| e as i64),
53 annotation.anchor.quote,
54 annotation.anchor.prefix,
55 annotation.anchor.suffix,
56 annotation.anchor.sentence_id,
57 annotation.anchor.source_version,
58 annotation.anchor.status.as_str(),
59 annotation.note,
60 annotation.color,
61 serde_json::to_string(&annotation.tags).unwrap_or_else(|_| "[]".into()),
62 annotation.author,
63 annotation.created_at.to_rfc3339(),
64 annotation.updated_at.to_rfc3339(),
65 annotation.deleted_at.map(|d| d.to_rfc3339()),
66 ],
67 )?;
68 Ok(())
69 }
70
71 pub fn get(&self, id: &str) -> Result<Option<Annotation>, DbError> {
73 let conn = self.db.conn()?;
74 let mut stmt =
75 conn.prepare("SELECT * FROM annotations WHERE id = ?1 AND deleted_at IS NULL")?;
76 let out = stmt.query_row(params![id], row_to_annotation).optional()?;
77 Ok(out)
78 }
79
80 pub fn list_by_paper(&self, paper_id: &str) -> Result<Vec<Annotation>, DbError> {
82 let conn = self.db.conn()?;
83 let mut stmt = conn.prepare(
84 "SELECT * FROM annotations
85 WHERE paper_id = ?1 AND deleted_at IS NULL
86 ORDER BY created_at ASC",
87 )?;
88 let rows = stmt.query_map(params![paper_id], row_to_annotation)?;
89 Ok(rows.filter_map(Result::ok).collect())
90 }
91
92 pub fn list_replies(&self, parent_id: &str) -> Result<Vec<Annotation>, DbError> {
94 let conn = self.db.conn()?;
95 let mut stmt = conn.prepare(
96 "SELECT * FROM annotations
97 WHERE parent_id = ?1 AND deleted_at IS NULL
98 ORDER BY created_at ASC",
99 )?;
100 let rows = stmt.query_map(params![parent_id], row_to_annotation)?;
101 Ok(rows.filter_map(Result::ok).collect())
102 }
103
104 pub fn update_note(
107 &self,
108 id: &str,
109 note: &str,
110 color: Option<&str>,
111 tags: &[String],
112 ) -> Result<(), DbError> {
113 let conn = self.db.conn()?;
114 conn.execute(
115 "UPDATE annotations
116 SET note = ?1, color = ?2, tags_json = ?3, updated_at = ?4
117 WHERE id = ?5",
118 params![
119 note,
120 color,
121 serde_json::to_string(tags).unwrap_or_else(|_| "[]".into()),
122 Utc::now().to_rfc3339(),
123 id,
124 ],
125 )?;
126 Ok(())
127 }
128
129 pub fn update_anchor(&self, id: &str, anchor: &Anchor) -> Result<(), DbError> {
132 let conn = self.db.conn()?;
133 conn.execute(
134 "UPDATE annotations
135 SET char_start = ?1, char_end = ?2,
136 anchor_status = ?3, updated_at = ?4
137 WHERE id = ?5",
138 params![
139 anchor.char_range.map(|(s, _)| s as i64),
140 anchor.char_range.map(|(_, e)| e as i64),
141 anchor.status.as_str(),
142 Utc::now().to_rfc3339(),
143 id,
144 ],
145 )?;
146 Ok(())
147 }
148
149 pub fn soft_delete(&self, id: &str) -> Result<(), DbError> {
152 let conn = self.db.conn()?;
153 conn.execute(
154 "UPDATE annotations SET deleted_at = ?1 WHERE id = ?2",
155 params![Utc::now().to_rfc3339(), id],
156 )?;
157 Ok(())
158 }
159
160 pub fn mark_seen(&self, annotation_ids: &[&str], reader: &str) -> Result<(), DbError> {
163 if annotation_ids.is_empty() {
164 return Ok(());
165 }
166 let mut conn = self.db.conn()?;
167 let tx = conn.transaction()?;
168 let now = Utc::now().to_rfc3339();
169 for id in annotation_ids {
170 tx.execute(
171 "INSERT INTO annotation_reads (annotation_id, reader, seen_at)
172 VALUES (?1, ?2, ?3)
173 ON CONFLICT(annotation_id, reader) DO UPDATE SET seen_at = excluded.seen_at",
174 params![id, reader, now],
175 )?;
176 }
177 tx.commit()?;
178 Ok(())
179 }
180
181 pub fn mark_thread_seen(&self, root_id: &str, reader: &str) -> Result<(), DbError> {
184 let replies = self.list_replies(root_id)?;
185 let mut ids: Vec<&str> = replies.iter().map(|a| a.id.as_str()).collect();
186 ids.push(root_id);
187 self.mark_seen(&ids, reader)
188 }
189
190 pub fn list_unread(
196 &self,
197 reader: &str,
198 paper_id: Option<&str>,
199 ) -> Result<Vec<Annotation>, DbError> {
200 let conn = self.db.conn()?;
201 let (sql, rows) = if let Some(pid) = paper_id {
202 let mut stmt = conn.prepare(
203 "SELECT a.* FROM annotations a
204 LEFT JOIN annotation_reads r
205 ON r.annotation_id = a.id AND r.reader = ?1
206 WHERE a.paper_id = ?2
207 AND a.deleted_at IS NULL
208 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
209 ORDER BY a.created_at ASC",
210 )?;
211 let rows = stmt
212 .query_map(params![reader, pid], row_to_annotation)?
213 .filter_map(Result::ok)
214 .collect::<Vec<_>>();
215 ("scoped", rows)
216 } else {
217 let mut stmt = conn.prepare(
218 "SELECT a.* FROM annotations a
219 LEFT JOIN annotation_reads r
220 ON r.annotation_id = a.id AND r.reader = ?1
221 WHERE a.deleted_at IS NULL
222 AND (r.seen_at IS NULL OR r.seen_at < a.updated_at)
223 ORDER BY a.created_at ASC",
224 )?;
225 let rows = stmt
226 .query_map(params![reader], row_to_annotation)?
227 .filter_map(Result::ok)
228 .collect::<Vec<_>>();
229 ("all", rows)
230 };
231 let _ = sql; Ok(rows)
233 }
234}
235
236fn row_to_annotation(row: &rusqlite::Row) -> rusqlite::Result<Annotation> {
237 let char_start: Option<i64> = row.get("char_start")?;
238 let char_end: Option<i64> = row.get("char_end")?;
239 let char_range = match (char_start, char_end) {
240 (Some(s), Some(e)) => Some((s as usize, e as usize)),
241 _ => None,
242 };
243 let anchor_status_str: Option<String> = row.get("anchor_status")?;
244 let anchor = Anchor {
245 char_range,
246 quote: row.get("quote")?,
247 prefix: row.get("prefix")?,
248 suffix: row.get("suffix")?,
249 sentence_id: row.get("sentence_id")?,
250 source_version: row.get("source_version")?,
251 status: anchor_status_str
252 .as_deref()
253 .and_then(AnchorStatus::parse)
254 .unwrap_or_default(),
255 };
256
257 let tags_json: String = row.get("tags_json")?;
258 let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
259
260 let parent_id: Option<String> = row.get("parent_id")?;
261 let question_id: Option<String> = row.get("question_id")?;
262 let created_at: String = row.get("created_at")?;
263 let updated_at: String = row.get("updated_at")?;
264 let deleted_at: Option<String> = row.get("deleted_at")?;
265
266 Ok(Annotation {
267 id: AnnotationId::from(row.get::<_, String>("id")?),
268 parent_id: parent_id.map(AnnotationId::from),
269 paper_id: PaperId::from(row.get::<_, String>("paper_id")?),
270 question_id: question_id.map(QuestionId::from),
271 anchor,
272 note: row.get("note")?,
273 color: row.get("color")?,
274 tags,
275 author: row.get("author")?,
276 created_at: parse_dt(&created_at),
277 updated_at: parse_dt(&updated_at),
278 deleted_at: deleted_at.as_deref().map(parse_dt),
279 })
280}
281
282fn parse_dt(s: &str) -> DateTime<Utc> {
283 DateTime::parse_from_rfc3339(s).map_or_else(|_| Utc::now(), |dt| dt.with_timezone(&Utc))
284}
285
286pub const FUZZY_THRESHOLD: f64 = 0.9;
290
291pub fn resolve_anchor(anchor: &mut Anchor, text: &str) -> AnchorStatus {
311 resolve_anchor_with_threshold(anchor, text, FUZZY_THRESHOLD)
312}
313
314pub fn resolve_anchor_with_threshold(
315 anchor: &mut Anchor,
316 text: &str,
317 fuzzy_threshold: f64,
318) -> AnchorStatus {
319 if let (Some((start, end)), Some(quote)) = (anchor.char_range, anchor.quote.as_ref())
321 && let Some(slice) = char_slice(text, start, end)
322 && &slice == quote
323 {
324 anchor.status = AnchorStatus::Ok;
325 return AnchorStatus::Ok;
326 }
327
328 if let Some(quote) = anchor.quote.as_ref()
330 && let Some((sc, ec)) = find_with_context(
331 text,
332 quote,
333 anchor.prefix.as_deref(),
334 anchor.suffix.as_deref(),
335 )
336 {
337 anchor.char_range = Some((sc, ec));
338 anchor.status = AnchorStatus::Drifted;
339 return AnchorStatus::Drifted;
340 }
341
342 if let Some(quote) = anchor.quote.as_ref()
344 && let Some((sc, ec)) = fuzzy_find(text, quote, fuzzy_threshold)
345 {
346 anchor.char_range = Some((sc, ec));
347 anchor.status = AnchorStatus::Drifted;
348 return AnchorStatus::Drifted;
349 }
350
351 if let Some(sid) = anchor.sentence_id.as_ref()
353 && let Some((sc, ec)) = find_sentence_by_id(text, sid)
354 {
355 anchor.char_range = Some((sc, ec));
356 anchor.status = AnchorStatus::Drifted;
357 return AnchorStatus::Drifted;
358 }
359
360 anchor.status = AnchorStatus::Orphan;
361 AnchorStatus::Orphan
362}
363
364fn char_slice(text: &str, start: usize, end: usize) -> Option<String> {
368 if end < start {
369 return None;
370 }
371 let want = end - start;
372 let collected: String = text.chars().skip(start).take(want).collect();
373 if collected.chars().count() == want {
374 Some(collected)
375 } else {
376 None
377 }
378}
379
380fn find_all(text: &str, quote: &str) -> Vec<(usize, usize)> {
383 if quote.is_empty() {
384 return Vec::new();
385 }
386 let mut out = Vec::new();
387 let qlen_chars = quote.chars().count();
388 let mut search_byte = 0;
389 while let Some(rel) = text[search_byte..].find(quote) {
390 let abs = search_byte + rel;
391 let start_char = text[..abs].chars().count();
392 out.push((start_char, start_char + qlen_chars));
393 search_byte = abs + quote.len(); }
395 out
396}
397
398fn find_with_context(
403 text: &str,
404 quote: &str,
405 prefix: Option<&str>,
406 suffix: Option<&str>,
407) -> Option<(usize, usize)> {
408 let occurrences = find_all(text, quote);
409 if occurrences.is_empty() {
410 return None;
411 }
412 if occurrences.len() == 1 || (prefix.is_none() && suffix.is_none()) {
413 return Some(occurrences[0]);
414 }
415
416 let chars: Vec<char> = text.chars().collect();
417 occurrences
418 .into_iter()
419 .max_by_key(|&(sc, ec)| context_score(&chars, sc, ec, prefix, suffix))
420}
421
422fn context_score(
426 chars: &[char],
427 start: usize,
428 end: usize,
429 prefix: Option<&str>,
430 suffix: Option<&str>,
431) -> i64 {
432 let mut score = 0i64;
433 if let Some(p) = prefix {
434 let want: Vec<char> = p.chars().collect();
435 let max = want.len().min(start);
436 for i in 0..max {
437 if chars[start - 1 - i] == want[want.len() - 1 - i] {
439 score += 1;
440 } else {
441 break;
442 }
443 }
444 }
445 if let Some(s) = suffix {
446 let want: Vec<char> = s.chars().collect();
447 let max = want.len().min(chars.len().saturating_sub(end));
448 for i in 0..max {
449 if chars[end + i] == want[i] {
450 score += 1;
451 } else {
452 break;
453 }
454 }
455 }
456 score
457}
458
459fn fuzzy_find(text: &str, quote: &str, threshold: f64) -> Option<(usize, usize)> {
463 if quote.is_empty() {
464 return None;
465 }
466 let chars: Vec<char> = text.chars().collect();
467 let qlen = quote.chars().count();
468 if chars.len() < qlen {
469 return None;
470 }
471
472 let mut best: Option<(usize, f64)> = None;
473 for start in 0..=chars.len() - qlen {
474 let window: String = chars[start..start + qlen].iter().collect();
475 let score = strsim::jaro_winkler(&window, quote);
476 if score >= threshold && best.is_none_or(|(_, b)| score > b) {
477 best = Some((start, score));
478 }
479 }
480 best.map(|(start, _)| (start, start + qlen))
481}
482
483fn find_sentence_by_id(text: &str, sid: &str) -> Option<(usize, usize)> {
488 let chars: Vec<char> = text.chars().collect();
489 let mut sentence_start_char = 0;
490 let mut i = 0;
491 while i < chars.len() {
492 let ch = chars[i];
493 let is_terminator = matches!(ch, '.' | '!' | '?');
494 let is_end = i + 1 == chars.len();
495 if is_terminator || is_end {
496 let end = if is_end { chars.len() } else { i + 1 };
497 let sentence: String = chars[sentence_start_char..end].iter().collect();
498 let trimmed = sentence.trim();
499 if !trimmed.is_empty() && scitadel_core::models::sentence_id(trimmed) == sid {
500 let leading_ws = sentence.chars().take_while(|c| c.is_whitespace()).count();
502 let trailing_ws = sentence
503 .chars()
504 .rev()
505 .take_while(|c| c.is_whitespace())
506 .count();
507 let trimmed_start = sentence_start_char + leading_ws;
508 let trimmed_end = end - trailing_ws;
509 if trimmed_end > trimmed_start {
510 return Some((trimmed_start, trimmed_end));
511 }
512 }
513 sentence_start_char = end;
515 }
516 i += 1;
517 }
518 None
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524 use scitadel_core::models::Annotation;
525
526 fn fresh_db_with_paper() -> Database {
527 let db = Database::open_in_memory().unwrap();
528 db.migrate().unwrap();
529 let conn = db.conn().unwrap();
530 conn.execute(
531 "INSERT INTO papers (id, title, created_at, updated_at)
532 VALUES ('p1', 't', datetime('now'), datetime('now'))",
533 [],
534 )
535 .unwrap();
536 db
537 }
538
539 fn sample_root() -> Annotation {
540 Annotation::new_root(
541 PaperId::from("p1"),
542 "lars".into(),
543 "important passage".into(),
544 Anchor {
545 char_range: Some((10, 25)),
546 quote: Some("neutron energy".into()),
547 ..Anchor::default()
548 },
549 )
550 }
551
552 #[test]
567 fn annotation_writes_are_offline_safe() {
568 let db = fresh_db_with_paper();
569 let repo = SqliteAnnotationRepository::new(db);
570
571 let root = sample_root();
575 repo.create(&root).unwrap();
576 let reply = Annotation::new_reply(&root, "claude".into(), "seconded".into());
577 repo.create(&reply).unwrap();
578 repo.update_note(root.id.as_str(), "edited offline", None, &[])
579 .unwrap();
580 repo.soft_delete(reply.id.as_str()).unwrap();
581
582 let all = repo.list_by_paper("p1").unwrap();
584 assert_eq!(all.len(), 1, "root survives; reply tombstoned out");
585 assert_eq!(all[0].note, "edited offline");
586 }
587
588 #[test]
589 fn create_and_get_roundtrip() {
590 let db = fresh_db_with_paper();
591 let repo = SqliteAnnotationRepository::new(db);
592 let root = sample_root();
593 repo.create(&root).unwrap();
594
595 let loaded = repo.get(root.id.as_str()).unwrap().expect("present");
596 assert_eq!(loaded.note, "important passage");
597 assert_eq!(loaded.anchor.char_range, Some((10, 25)));
598 assert_eq!(loaded.anchor.quote.as_deref(), Some("neutron energy"));
599 }
600
601 #[test]
602 fn replies_threaded_under_root() {
603 let db = fresh_db_with_paper();
604 let repo = SqliteAnnotationRepository::new(db);
605 let root = sample_root();
606 repo.create(&root).unwrap();
607 let reply = Annotation::new_reply(&root, "claude".into(), "see fig 4".into());
608 repo.create(&reply).unwrap();
609
610 let replies = repo.list_replies(root.id.as_str()).unwrap();
611 assert_eq!(replies.len(), 1);
612 assert_eq!(replies[0].note, "see fig 4");
613 }
614
615 #[test]
616 fn soft_delete_hides_from_listings_but_thread_preserved() {
617 let db = fresh_db_with_paper();
618 let repo = SqliteAnnotationRepository::new(db);
619 let root = sample_root();
620 repo.create(&root).unwrap();
621 let reply = Annotation::new_reply(&root, "claude".into(), "yep".into());
622 repo.create(&reply).unwrap();
623
624 repo.soft_delete(root.id.as_str()).unwrap();
625
626 assert!(repo.get(root.id.as_str()).unwrap().is_none());
628 assert!(
629 repo.list_by_paper("p1")
630 .unwrap()
631 .iter()
632 .all(|a| a.id != root.id)
633 );
634 let replies = repo.list_replies(root.id.as_str()).unwrap();
637 assert_eq!(replies.len(), 1);
638 }
639
640 #[test]
641 fn update_note_persists() {
642 let db = fresh_db_with_paper();
643 let repo = SqliteAnnotationRepository::new(db);
644 let root = sample_root();
645 repo.create(&root).unwrap();
646
647 repo.update_note(
648 root.id.as_str(),
649 "new note",
650 Some("blue"),
651 &["tag1".into(), "tag2".into()],
652 )
653 .unwrap();
654
655 let loaded = repo.get(root.id.as_str()).unwrap().unwrap();
656 assert_eq!(loaded.note, "new note");
657 assert_eq!(loaded.color.as_deref(), Some("blue"));
658 assert_eq!(loaded.tags, vec!["tag1".to_string(), "tag2".to_string()]);
659 }
660
661 #[test]
664 fn resolver_ok_when_text_unchanged() {
665 let mut a = Anchor {
667 char_range: Some((1, 4)),
668 quote: Some("bcd".into()),
669 ..Anchor::default()
670 };
671 assert_eq!(resolve_anchor(&mut a, "abcde"), AnchorStatus::Ok);
672 }
673
674 #[test]
675 fn resolver_drifted_when_quote_moved() {
676 let mut a = Anchor {
678 char_range: Some((1, 4)),
679 quote: Some("bcd".into()),
680 ..Anchor::default()
681 };
682 assert_eq!(resolve_anchor(&mut a, "xxabcde"), AnchorStatus::Drifted);
683 assert_eq!(a.char_range, Some((3, 6)));
684 assert_eq!(a.status, AnchorStatus::Drifted);
685 }
686
687 #[test]
688 fn resolver_orphan_when_quote_missing() {
689 let mut a = Anchor {
690 char_range: Some((1, 4)),
691 quote: Some("bcd".into()),
692 ..Anchor::default()
693 };
694 assert_eq!(
695 resolve_anchor(&mut a, "nothing to see"),
696 AnchorStatus::Orphan
697 );
698 }
699
700 #[test]
703 fn unread_includes_rows_never_seen() {
704 let db = fresh_db_with_paper();
705 let repo = SqliteAnnotationRepository::new(db);
706 let a = sample_root();
707 repo.create(&a).unwrap();
708 let unread = repo.list_unread("lars", Some("p1")).unwrap();
709 assert_eq!(unread.len(), 1);
710 }
711
712 #[test]
713 fn unread_excludes_rows_seen_after_update() {
714 let db = fresh_db_with_paper();
715 let repo = SqliteAnnotationRepository::new(db);
716 let a = sample_root();
717 repo.create(&a).unwrap();
718 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
719 let unread = repo.list_unread("lars", Some("p1")).unwrap();
720 assert!(unread.is_empty(), "should be no unread after mark_seen");
721 }
722
723 #[test]
724 fn unread_reappears_after_annotation_is_updated() {
725 let db = fresh_db_with_paper();
726 let repo = SqliteAnnotationRepository::new(db);
727 let a = sample_root();
728 repo.create(&a).unwrap();
729 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
730 std::thread::sleep(std::time::Duration::from_millis(1100));
732 repo.update_note(a.id.as_str(), "edited note", None, &[])
733 .unwrap();
734 let unread = repo.list_unread("lars", Some("p1")).unwrap();
735 assert_eq!(unread.len(), 1, "edit should resurface the row as unread");
736 }
737
738 #[test]
739 fn mark_thread_seen_covers_root_and_replies() {
740 let db = fresh_db_with_paper();
741 let repo = SqliteAnnotationRepository::new(db);
742 let root = sample_root();
743 repo.create(&root).unwrap();
744 let reply = Annotation::new_reply(&root, "claude".into(), "follow-up".into());
745 repo.create(&reply).unwrap();
746
747 repo.mark_thread_seen(root.id.as_str(), "lars").unwrap();
748 let unread = repo.list_unread("lars", Some("p1")).unwrap();
749 assert!(unread.is_empty());
750 }
751
752 #[test]
753 fn independent_readers_track_state_independently() {
754 let db = fresh_db_with_paper();
755 let repo = SqliteAnnotationRepository::new(db);
756 let a = sample_root();
757 repo.create(&a).unwrap();
758 repo.mark_seen(&[a.id.as_str()], "lars").unwrap();
759 assert!(repo.list_unread("lars", Some("p1")).unwrap().is_empty());
760 assert_eq!(repo.list_unread("claude", Some("p1")).unwrap().len(), 1);
761 }
762
763 #[test]
764 fn resolver_handles_multibyte_chars() {
765 let text = "D\u{2019}Ippolito wrote that...";
767 let quote = "D\u{2019}Ippolito";
768 let mut a = Anchor {
769 char_range: Some((0, quote.chars().count())),
770 quote: Some(quote.into()),
771 ..Anchor::default()
772 };
773 assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Ok);
774 }
775
776 #[test]
779 fn resolver_uses_prefix_to_disambiguate_collision() {
780 let text = "Initially the model failed. Then the model was trained on more data.";
782 let mut a = Anchor {
783 char_range: None,
784 quote: Some("the model".into()),
785 prefix: None,
786 suffix: Some(" was trained".into()),
787 ..Anchor::default()
788 };
789 assert_eq!(resolve_anchor(&mut a, text), AnchorStatus::Drifted);
790 let (s, e) = a.char_range.unwrap();
791 assert_eq!(&text[s..e], "the model");
792 assert!(s > 20, "expected the second occurrence at s>20, got s={s}");
794 }
795
796 #[test]
797 fn resolver_falls_back_to_fuzzy_on_minor_edit() {
798 let text = "We argued the network was very deep enough to overfit.";
801 let mut a = Anchor {
802 char_range: None,
803 quote: Some("the network was deep".into()),
804 ..Anchor::default()
805 };
806 let s = resolve_anchor_with_threshold(&mut a, text, 0.85);
809 assert_eq!(
810 s,
811 AnchorStatus::Drifted,
812 "fuzzy match should drift, got {s:?}"
813 );
814 }
815
816 #[test]
817 fn resolver_returns_orphan_when_offsets_oob_and_quote_absent() {
818 let mut a = Anchor {
821 char_range: Some((9000, 9100)),
822 quote: Some("vanished".into()),
823 ..Anchor::default()
824 };
825 assert_eq!(
826 resolve_anchor(&mut a, "the small text"),
827 AnchorStatus::Orphan
828 );
829 }
830
831 #[test]
832 fn resolver_uses_sentence_id_when_quote_unfindable() {
833 use scitadel_core::models::sentence_id;
834 let original_sentence = "The Transformer Architecture relies on self-attention.";
838 let new_text = "Intro. the transformer architecture relies on self-attention. Outro.";
839 let mut a = Anchor {
840 char_range: None,
841 quote: Some("ZZZ-not-in-new-text-ZZZ".into()),
843 sentence_id: Some(sentence_id(original_sentence)),
844 ..Anchor::default()
845 };
846 let s = resolve_anchor(&mut a, new_text);
847 assert_eq!(
848 s,
849 AnchorStatus::Drifted,
850 "sentence-id rescue should mark Drifted, got {s:?}"
851 );
852 let (start, end) = a.char_range.unwrap();
853 let resolved: String = new_text.chars().skip(start).take(end - start).collect();
854 assert!(
855 resolved.contains("transformer architecture"),
856 "expected re-anchor to the matching sentence; got {resolved:?}"
857 );
858 }
859}