Skip to main content

verso/reader/
anchor.rs

1use serde::{Deserialize, Serialize};
2use sha2::{Digest, Sha256};
3
4#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub struct Location {
6    #[serde(rename = "s")]
7    pub spine_idx: u32,
8    #[serde(rename = "o")]
9    pub char_offset: u64,
10    #[serde(rename = "h")]
11    pub anchor_hash: String,
12}
13
14/// 16-hex-char SHA-256 of the 50-char window centred on `char_offset` (by char, not byte).
15///
16/// The offset is quantised to 16-char buckets so that small drifts (< 16 chars) hash
17/// to the same window — making the anchor hash stable across trivial re-imports.
18pub fn anchor_hash(text: &str, char_offset: usize) -> String {
19    let chars: Vec<char> = text.chars().collect();
20    let half = 25usize;
21    // Quantise to 16-char buckets so tiny shifts produce the same window.
22    let bucket = (char_offset / 16) * 16;
23    let start = bucket.saturating_sub(half);
24    let end = (bucket + half).min(chars.len());
25    let window: String = chars[start..end].iter().collect();
26    let digest = Sha256::digest(window.as_bytes());
27    hex::encode(&digest[..8])
28}
29
30/// Try to relocate the passage `text` in `new_plaintext` given the original offset
31/// and the stored context windows. Returns the new char-offset where `text` starts.
32pub fn reanchor(
33    new_plaintext: &str,
34    text: &str,
35    original_offset: usize,
36    ctx_before: &str,
37    ctx_after: &str,
38) -> Option<usize> {
39    // Strategy: search for ctx_before + text + ctx_after as a flexible window.
40    let needle = format!("{ctx_before}{text}{ctx_after}");
41    if let Some(i) = new_plaintext.find(&needle) {
42        // Map byte index → char index, then offset by ctx_before length in chars.
43        let char_i = new_plaintext[..i].chars().count();
44        return Some(char_i + ctx_before.chars().count());
45    }
46    // Fallback: search for `text` alone; accept only if unique.
47    let matches: Vec<usize> = new_plaintext.match_indices(text).map(|(i, _)| i).collect();
48    if matches.len() == 1 {
49        return Some(new_plaintext[..matches[0]].chars().count());
50    }
51    // Ambiguous: pick the match closest to the original offset.
52    if !matches.is_empty() {
53        let best = matches
54            .into_iter()
55            .map(|b| new_plaintext[..b].chars().count())
56            .min_by_key(|c| (c.wrapping_sub(original_offset) as i64).abs())?;
57        return Some(best);
58    }
59    None
60}