use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use super::{AnnotationId, PaperId, QuestionId};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum AnchorStatus {
#[default]
Ok,
Drifted,
Orphan,
}
impl AnchorStatus {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Self::Ok => "ok",
Self::Drifted => "drifted",
Self::Orphan => "orphan",
}
}
#[must_use]
pub fn parse(s: &str) -> Option<Self> {
match s {
"ok" => Some(Self::Ok),
"drifted" => Some(Self::Drifted),
"orphan" => Some(Self::Orphan),
_ => None,
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Anchor {
pub char_range: Option<(usize, usize)>,
pub quote: Option<String>,
pub prefix: Option<String>,
pub suffix: Option<String>,
pub sentence_id: Option<String>,
pub source_version: Option<String>,
pub status: AnchorStatus,
}
impl Anchor {
#[must_use]
pub fn is_orphan(&self) -> bool {
matches!(self.status, AnchorStatus::Orphan)
}
#[must_use]
pub fn is_imported_synthetic(&self) -> bool {
self.char_range.is_none()
&& self.quote.is_none()
&& self
.sentence_id
.as_deref()
.is_some_and(|s| s.starts_with(IMPORTED_SENTENCE_ID_PREFIX))
}
#[must_use]
pub fn is_paper_note(&self) -> bool {
self.char_range.is_none()
&& self.quote.is_none()
&& self
.sentence_id
.as_deref()
.is_some_and(|s| s.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX))
}
}
pub const IMPORTED_SENTENCE_ID_PREFIX: &str = "bibtex-import:";
pub const PAPER_NOTE_SENTENCE_ID_PREFIX: &str = "paper-note:";
#[must_use]
pub fn paper_note_sentence_id(paper_id: &str) -> String {
format!("{PAPER_NOTE_SENTENCE_ID_PREFIX}{paper_id}")
}
#[must_use]
pub fn paper_note_anchor(paper_id: &str) -> Anchor {
Anchor {
sentence_id: Some(paper_note_sentence_id(paper_id)),
status: AnchorStatus::Ok,
..Anchor::default()
}
}
#[must_use]
pub fn imported_sentence_id(citekey: &str, note: &str) -> String {
let content_hash = sentence_id(note);
format!("{IMPORTED_SENTENCE_ID_PREFIX}{citekey}:{content_hash}")
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Annotation {
pub id: AnnotationId,
pub parent_id: Option<AnnotationId>,
pub paper_id: PaperId,
pub question_id: Option<QuestionId>,
pub anchor: Anchor,
pub note: String,
pub color: Option<String>,
pub tags: Vec<String>,
pub author: String,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub deleted_at: Option<DateTime<Utc>>,
}
impl Annotation {
#[must_use]
pub fn new_root(paper_id: PaperId, author: String, note: String, anchor: Anchor) -> Self {
let now = Utc::now();
Self {
id: AnnotationId::new(),
parent_id: None,
paper_id,
question_id: None,
anchor,
note,
color: None,
tags: Vec::new(),
author,
created_at: now,
updated_at: now,
deleted_at: None,
}
}
#[must_use]
pub fn new_reply(parent: &Annotation, author: String, note: String) -> Self {
let now = Utc::now();
Self {
id: AnnotationId::new(),
parent_id: Some(parent.id.clone()),
paper_id: parent.paper_id.clone(),
question_id: parent.question_id.clone(),
anchor: Anchor::default(),
note,
color: None,
tags: Vec::new(),
author,
created_at: now,
updated_at: now,
deleted_at: None,
}
}
#[must_use]
pub fn is_reply(&self) -> bool {
self.parent_id.is_some()
}
#[must_use]
pub fn is_deleted(&self) -> bool {
self.deleted_at.is_some()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct AnnotationRead {
pub annotation_id: AnnotationId,
pub reader: String,
pub seen_at: DateTime<Utc>,
}
#[must_use]
pub fn normalize_sentence(s: &str) -> String {
use unicode_normalization::UnicodeNormalization;
let composed: String = s.nfkc().collect();
let lowered: String = composed.chars().flat_map(char::to_lowercase).collect();
let mut out = String::with_capacity(lowered.len());
let mut prev_was_space = true; for ch in lowered.chars() {
if ch.is_whitespace() {
if !prev_was_space {
out.push(' ');
prev_was_space = true;
}
} else {
out.push(ch);
prev_was_space = false;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
#[must_use]
pub fn sentence_id(s: &str) -> String {
use sha1::{Digest, Sha1};
let normalized = normalize_sentence(s);
let mut hasher = Sha1::new();
hasher.update(normalized.as_bytes());
let digest = hasher.finalize();
let mut hex = String::with_capacity(40);
for byte in digest {
use std::fmt::Write as _;
let _ = write!(hex, "{byte:02x}");
}
hex
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn reply_inherits_paper_and_question() {
let paper_id: PaperId = "p1".into();
let root = Annotation::new_root(
paper_id.clone(),
"lars".into(),
"interesting passage".into(),
Anchor {
quote: Some("neutron energy".into()),
..Anchor::default()
},
);
let reply = Annotation::new_reply(&root, "claude".into(), "agreed; see 4.2".into());
assert_eq!(reply.paper_id, paper_id);
assert_eq!(reply.parent_id.as_ref(), Some(&root.id));
assert!(reply.anchor.quote.is_none(), "replies inherit anchor");
}
#[test]
fn anchor_status_round_trip() {
for s in [
AnchorStatus::Ok,
AnchorStatus::Drifted,
AnchorStatus::Orphan,
] {
assert_eq!(AnchorStatus::parse(s.as_str()), Some(s));
}
}
#[test]
fn orphan_flag() {
let mut a = Anchor::default();
assert!(!a.is_orphan());
a.status = AnchorStatus::Orphan;
assert!(a.is_orphan());
}
#[test]
fn imported_synthetic_id_has_marker_prefix() {
let id = imported_sentence_id("smith2024", "some note");
assert!(id.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
assert!(id.contains("smith2024"));
}
#[test]
fn is_imported_synthetic_recognises_marker_anchor() {
let a = Anchor {
sentence_id: Some(imported_sentence_id("k", "n")),
..Anchor::default()
};
assert!(a.is_imported_synthetic());
}
#[test]
fn is_imported_synthetic_rejects_real_sentence_id() {
let a = Anchor {
sentence_id: Some(sentence_id("a real sentence.")),
..Anchor::default()
};
assert!(!a.is_imported_synthetic());
}
#[test]
fn paper_note_sentinel_is_disjoint_from_imported() {
assert_ne!(IMPORTED_SENTENCE_ID_PREFIX, PAPER_NOTE_SENTENCE_ID_PREFIX);
let imp = imported_sentence_id("k", "n");
let pn = paper_note_sentence_id("p-attn");
assert!(imp.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
assert!(pn.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX));
assert!(!imp.starts_with(PAPER_NOTE_SENTENCE_ID_PREFIX));
assert!(!pn.starts_with(IMPORTED_SENTENCE_ID_PREFIX));
}
#[test]
fn paper_note_anchor_is_recognised_by_predicate() {
let a = paper_note_anchor("p-attn");
assert!(a.is_paper_note());
assert_eq!(a.status, AnchorStatus::Ok);
assert!(a.quote.is_none());
assert!(a.char_range.is_none());
assert_eq!(
a.sentence_id.as_deref(),
Some(&*paper_note_sentence_id("p-attn"))
);
}
#[test]
fn paper_note_id_is_stable_per_paper() {
assert_eq!(paper_note_sentence_id("p-1"), paper_note_sentence_id("p-1"));
assert_ne!(paper_note_sentence_id("p-1"), paper_note_sentence_id("p-2"));
}
#[test]
fn is_paper_note_recognises_marker_anchor() {
let a = Anchor {
sentence_id: Some(paper_note_sentence_id("p-1")),
..Anchor::default()
};
assert!(a.is_paper_note());
assert!(!a.is_imported_synthetic());
}
#[test]
fn is_paper_note_rejects_quote_or_range() {
let with_quote = Anchor {
sentence_id: Some(paper_note_sentence_id("p-1")),
quote: Some("hi".into()),
..Anchor::default()
};
assert!(!with_quote.is_paper_note());
let with_range = Anchor {
sentence_id: Some(paper_note_sentence_id("p-1")),
char_range: Some((0, 2)),
..Anchor::default()
};
assert!(!with_range.is_paper_note());
}
#[test]
fn is_imported_synthetic_rejects_anchor_with_quote_or_range() {
let with_quote = Anchor {
sentence_id: Some(imported_sentence_id("k", "n")),
quote: Some("hi".into()),
..Anchor::default()
};
assert!(!with_quote.is_imported_synthetic());
let with_range = Anchor {
sentence_id: Some(imported_sentence_id("k", "n")),
char_range: Some((0, 2)),
..Anchor::default()
};
assert!(!with_range.is_imported_synthetic());
}
#[test]
fn normalize_collapses_whitespace_and_lowercases() {
assert_eq!(normalize_sentence(" Hello WORLD\n"), "hello world");
}
#[test]
fn normalize_folds_ligatures_via_nfkc() {
assert_eq!(normalize_sentence("ef\u{FB01}cient"), "efficient");
}
#[test]
fn sentence_id_is_stable_under_whitespace_and_case() {
let a = sentence_id("Hello World");
let b = sentence_id("hello world");
let c = sentence_id("HELLO\tWORLD");
assert_eq!(a, b);
assert_eq!(b, c);
assert_eq!(a.len(), 40);
}
#[test]
fn sentence_id_changes_when_content_does() {
assert_ne!(sentence_id("hello world"), sentence_id("hello mars"));
}
}