1use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11use super::{AnnotationId, PaperId, QuestionId};
12
13#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "lowercase")]
16pub enum AnchorStatus {
17 #[default]
18 Ok,
20 Drifted,
22 Orphan,
24}
25
26impl AnchorStatus {
27 #[must_use]
28 pub fn as_str(self) -> &'static str {
29 match self {
30 Self::Ok => "ok",
31 Self::Drifted => "drifted",
32 Self::Orphan => "orphan",
33 }
34 }
35
36 #[must_use]
37 pub fn parse(s: &str) -> Option<Self> {
38 match s {
39 "ok" => Some(Self::Ok),
40 "drifted" => Some(Self::Drifted),
41 "orphan" => Some(Self::Orphan),
42 _ => None,
43 }
44 }
45}
46
47#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
50pub struct Anchor {
51 pub char_range: Option<(usize, usize)>,
53 pub quote: Option<String>,
55 pub prefix: Option<String>,
57 pub suffix: Option<String>,
59 pub sentence_id: Option<String>,
61 pub source_version: Option<String>,
63 pub status: AnchorStatus,
65}
66
67impl Anchor {
68 #[must_use]
70 pub fn is_orphan(&self) -> bool {
71 matches!(self.status, AnchorStatus::Orphan)
72 }
73
74 #[must_use]
81 pub fn is_imported_synthetic(&self) -> bool {
82 self.char_range.is_none()
83 && self.quote.is_none()
84 && self
85 .sentence_id
86 .as_deref()
87 .is_some_and(|s| s.starts_with(IMPORTED_SENTENCE_ID_PREFIX))
88 }
89
90 #[must_use]
98 pub fn is_paper_note(&self) -> bool {
99 self.char_range.is_none()
100 && self.quote.is_none()
101 && self
102 .sentence_id
103 .as_deref()
104 .is_some_and(|s| s.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX))
105 }
106}
107
108pub const IMPORTED_SENTENCE_ID_PREFIX: &str = "bibtex-import:";
113
114pub const PAPER_NOTE_SENTENCE_ID_PREFIX: &str = "paper-note:";
122
123#[must_use]
127pub fn paper_note_sentence_id(paper_id: &str) -> String {
128 format!("{PAPER_NOTE_SENTENCE_ID_PREFIX}{paper_id}")
129}
130
131#[must_use]
139pub fn paper_note_anchor(paper_id: &str) -> Anchor {
140 Anchor {
141 sentence_id: Some(paper_note_sentence_id(paper_id)),
142 status: AnchorStatus::Ok,
143 ..Anchor::default()
144 }
145}
146
147#[must_use]
154pub fn imported_sentence_id(citekey: &str, note: &str) -> String {
155 let content_hash = sentence_id(note);
156 format!("{IMPORTED_SENTENCE_ID_PREFIX}{citekey}:{content_hash}")
157}
158
159#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
162pub struct Annotation {
163 pub id: AnnotationId,
164 pub parent_id: Option<AnnotationId>,
166 pub paper_id: PaperId,
167 pub question_id: Option<QuestionId>,
168 pub anchor: Anchor,
169 pub note: String,
170 pub color: Option<String>,
171 pub tags: Vec<String>,
172 pub author: String,
174 pub created_at: DateTime<Utc>,
175 pub updated_at: DateTime<Utc>,
176 pub deleted_at: Option<DateTime<Utc>>,
178}
179
180impl Annotation {
181 #[must_use]
183 pub fn new_root(paper_id: PaperId, author: String, note: String, anchor: Anchor) -> Self {
184 let now = Utc::now();
185 Self {
186 id: AnnotationId::new(),
187 parent_id: None,
188 paper_id,
189 question_id: None,
190 anchor,
191 note,
192 color: None,
193 tags: Vec::new(),
194 author,
195 created_at: now,
196 updated_at: now,
197 deleted_at: None,
198 }
199 }
200
201 #[must_use]
203 pub fn new_reply(parent: &Annotation, author: String, note: String) -> Self {
204 let now = Utc::now();
205 Self {
206 id: AnnotationId::new(),
207 parent_id: Some(parent.id.clone()),
208 paper_id: parent.paper_id.clone(),
209 question_id: parent.question_id.clone(),
210 anchor: Anchor::default(),
211 note,
212 color: None,
213 tags: Vec::new(),
214 author,
215 created_at: now,
216 updated_at: now,
217 deleted_at: None,
218 }
219 }
220
221 #[must_use]
223 pub fn is_reply(&self) -> bool {
224 self.parent_id.is_some()
225 }
226
227 #[must_use]
229 pub fn is_deleted(&self) -> bool {
230 self.deleted_at.is_some()
231 }
232}
233
234#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
236pub struct AnnotationRead {
237 pub annotation_id: AnnotationId,
238 pub reader: String,
239 pub seen_at: DateTime<Utc>,
240}
241
242#[must_use]
249pub fn normalize_sentence(s: &str) -> String {
250 use unicode_normalization::UnicodeNormalization;
251 let composed: String = s.nfkc().collect();
253 let lowered: String = composed.chars().flat_map(char::to_lowercase).collect();
254 let mut out = String::with_capacity(lowered.len());
255 let mut prev_was_space = true; for ch in lowered.chars() {
257 if ch.is_whitespace() {
258 if !prev_was_space {
259 out.push(' ');
260 prev_was_space = true;
261 }
262 } else {
263 out.push(ch);
264 prev_was_space = false;
265 }
266 }
267 if out.ends_with(' ') {
268 out.pop();
269 }
270 out
271}
272
273#[must_use]
278pub fn sentence_id(s: &str) -> String {
279 use sha1::{Digest, Sha1};
280 let normalized = normalize_sentence(s);
281 let mut hasher = Sha1::new();
282 hasher.update(normalized.as_bytes());
283 let digest = hasher.finalize();
284 let mut hex = String::with_capacity(40);
285 for byte in digest {
286 use std::fmt::Write as _;
287 let _ = write!(hex, "{byte:02x}");
288 }
289 hex
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn reply_inherits_paper_and_question() {
298 let paper_id: PaperId = "p1".into();
299 let root = Annotation::new_root(
300 paper_id.clone(),
301 "lars".into(),
302 "interesting passage".into(),
303 Anchor {
304 quote: Some("neutron energy".into()),
305 ..Anchor::default()
306 },
307 );
308 let reply = Annotation::new_reply(&root, "claude".into(), "agreed; see 4.2".into());
309 assert_eq!(reply.paper_id, paper_id);
310 assert_eq!(reply.parent_id.as_ref(), Some(&root.id));
311 assert!(reply.anchor.quote.is_none(), "replies inherit anchor");
312 }
313
314 #[test]
315 fn anchor_status_round_trip() {
316 for s in [
317 AnchorStatus::Ok,
318 AnchorStatus::Drifted,
319 AnchorStatus::Orphan,
320 ] {
321 assert_eq!(AnchorStatus::parse(s.as_str()), Some(s));
322 }
323 }
324
325 #[test]
326 fn orphan_flag() {
327 let mut a = Anchor::default();
328 assert!(!a.is_orphan());
329 a.status = AnchorStatus::Orphan;
330 assert!(a.is_orphan());
331 }
332
333 #[test]
334 fn imported_synthetic_id_has_marker_prefix() {
335 let id = imported_sentence_id("smith2024", "some note");
336 assert!(id.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
337 assert!(id.contains("smith2024"));
338 }
339
340 #[test]
341 fn is_imported_synthetic_recognises_marker_anchor() {
342 let a = Anchor {
343 sentence_id: Some(imported_sentence_id("k", "n")),
344 ..Anchor::default()
345 };
346 assert!(a.is_imported_synthetic());
347 }
348
349 #[test]
350 fn is_imported_synthetic_rejects_real_sentence_id() {
351 let a = Anchor {
352 sentence_id: Some(sentence_id("a real sentence.")),
353 ..Anchor::default()
354 };
355 assert!(!a.is_imported_synthetic());
356 }
357
358 #[test]
359 fn paper_note_sentinel_is_disjoint_from_imported() {
360 assert_ne!(IMPORTED_SENTENCE_ID_PREFIX, PAPER_NOTE_SENTENCE_ID_PREFIX);
365 let imp = imported_sentence_id("k", "n");
366 let pn = paper_note_sentence_id("p-attn");
367 assert!(imp.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
368 assert!(pn.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX));
369 assert!(!imp.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX));
370 assert!(!pn.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
371 }
372
373 #[test]
374 fn paper_note_anchor_is_recognised_by_predicate() {
375 let a = paper_note_anchor("p-attn");
380 assert!(a.is_paper_note());
381 assert_eq!(a.status, AnchorStatus::Ok);
382 assert!(a.quote.is_none());
383 assert!(a.char_range.is_none());
384 assert_eq!(
385 a.sentence_id.as_deref(),
386 Some(&*paper_note_sentence_id("p-attn"))
387 );
388 }
389
390 #[test]
391 fn paper_note_id_is_stable_per_paper() {
392 assert_eq!(paper_note_sentence_id("p-1"), paper_note_sentence_id("p-1"));
396 assert_ne!(paper_note_sentence_id("p-1"), paper_note_sentence_id("p-2"));
397 }
398
399 #[test]
400 fn is_paper_note_recognises_marker_anchor() {
401 let a = Anchor {
402 sentence_id: Some(paper_note_sentence_id("p-1")),
403 ..Anchor::default()
404 };
405 assert!(a.is_paper_note());
406 assert!(!a.is_imported_synthetic());
409 }
410
411 #[test]
412 fn is_paper_note_rejects_quote_or_range() {
413 let with_quote = Anchor {
414 sentence_id: Some(paper_note_sentence_id("p-1")),
415 quote: Some("hi".into()),
416 ..Anchor::default()
417 };
418 assert!(!with_quote.is_paper_note());
419
420 let with_range = Anchor {
421 sentence_id: Some(paper_note_sentence_id("p-1")),
422 char_range: Some((0, 2)),
423 ..Anchor::default()
424 };
425 assert!(!with_range.is_paper_note());
426 }
427
428 #[test]
429 fn is_imported_synthetic_rejects_anchor_with_quote_or_range() {
430 let with_quote = Anchor {
431 sentence_id: Some(imported_sentence_id("k", "n")),
432 quote: Some("hi".into()),
433 ..Anchor::default()
434 };
435 assert!(!with_quote.is_imported_synthetic());
436
437 let with_range = Anchor {
438 sentence_id: Some(imported_sentence_id("k", "n")),
439 char_range: Some((0, 2)),
440 ..Anchor::default()
441 };
442 assert!(!with_range.is_imported_synthetic());
443 }
444
445 #[test]
446 fn normalize_collapses_whitespace_and_lowercases() {
447 assert_eq!(normalize_sentence(" Hello WORLD\n"), "hello world");
448 }
449
450 #[test]
451 fn normalize_folds_ligatures_via_nfkc() {
452 assert_eq!(normalize_sentence("ef\u{FB01}cient"), "efficient");
454 }
455
456 #[test]
457 fn sentence_id_is_stable_under_whitespace_and_case() {
458 let a = sentence_id("Hello World");
459 let b = sentence_id("hello world");
460 let c = sentence_id("HELLO\tWORLD");
461 assert_eq!(a, b);
462 assert_eq!(b, c);
463 assert_eq!(a.len(), 40);
465 }
466
467 #[test]
468 fn sentence_id_changes_when_content_does() {
469 assert_ne!(sentence_id("hello world"), sentence_id("hello mars"));
470 }
471}