Skip to main content

cdx_core/
anchor.rs

1//! Unified anchor system for content references.
2//!
3//! Anchors provide a way to reference specific locations within document content,
4//! supporting block-level, point, and range anchors.
5//!
6//! # Anchor Types
7//!
8//! - **Block-level**: References an entire block (`#blockId`)
9//! - **Point**: References a specific character offset (`#blockId/15`)
10//! - **Range**: References a character range (`#blockId/10-25`)
11//!
12//! # Example
13//!
14//! ```rust
15//! use cdx_core::anchor::{ContentAnchor, ContentAnchorUri};
16//!
17//! // Block-level anchor
18//! let anchor = ContentAnchor::block("para-1");
19//!
20//! // Point anchor at offset 15
21//! let point_anchor = ContentAnchor::point("para-1", 15);
22//!
23//! // Range anchor from offset 10 to 25
24//! let range_anchor = ContentAnchor::range("para-1", 10, 25);
25//!
26//! // Parse from URI format
27//! let uri: ContentAnchorUri = "#para-1/10-25".parse().unwrap();
28//! let anchor = ContentAnchor::from(uri);
29//! ```
30
31use std::fmt;
32use std::str::FromStr;
33
34use serde::{Deserialize, Serialize};
35
36use crate::DocumentId;
37
38/// A content anchor referencing a location within the document.
39///
40/// Anchors can reference:
41/// - An entire block (block-level anchor)
42/// - A specific character position within a block (point anchor)
43/// - A character range within a block (range anchor)
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45#[serde(rename_all = "camelCase")]
46pub struct ContentAnchor {
47    /// The ID of the referenced block.
48    pub block_id: String,
49
50    /// Character offset for point anchors.
51    /// When present without `end`, this is a point anchor.
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub offset: Option<usize>,
54
55    /// Start of range for range anchors.
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub start: Option<usize>,
58
59    /// End of range for range anchors (exclusive).
60    #[serde(default, skip_serializing_if = "Option::is_none")]
61    pub end: Option<usize>,
62
63    /// Optional content hash for stale detection.
64    /// When set, can be used to detect if the referenced content has changed.
65    #[serde(default, skip_serializing_if = "Option::is_none")]
66    pub content_hash: Option<DocumentId>,
67}
68
69impl ContentAnchor {
70    /// Create a block-level anchor referencing an entire block.
71    #[must_use]
72    pub fn block(block_id: impl Into<String>) -> Self {
73        Self {
74            block_id: block_id.into(),
75            offset: None,
76            start: None,
77            end: None,
78            content_hash: None,
79        }
80    }
81
82    /// Create a point anchor at a specific character offset.
83    #[must_use]
84    pub fn point(block_id: impl Into<String>, offset: usize) -> Self {
85        Self {
86            block_id: block_id.into(),
87            offset: Some(offset),
88            start: None,
89            end: None,
90            content_hash: None,
91        }
92    }
93
94    /// Create a range anchor spanning a character range.
95    #[must_use]
96    pub fn range(block_id: impl Into<String>, start: usize, end: usize) -> Self {
97        Self {
98            block_id: block_id.into(),
99            offset: None,
100            start: Some(start),
101            end: Some(end),
102            content_hash: None,
103        }
104    }
105
106    /// Set the content hash for stale detection.
107    #[must_use]
108    pub fn with_content_hash(mut self, hash: DocumentId) -> Self {
109        self.content_hash = Some(hash);
110        self
111    }
112
113    /// Check if this is a block-level anchor (no point or range).
114    #[must_use]
115    pub fn is_block_anchor(&self) -> bool {
116        self.offset.is_none() && self.start.is_none() && self.end.is_none()
117    }
118
119    /// Check if this is a point anchor.
120    #[must_use]
121    pub fn is_point_anchor(&self) -> bool {
122        self.offset.is_some()
123    }
124
125    /// Check if this is a range anchor.
126    #[must_use]
127    pub fn is_range_anchor(&self) -> bool {
128        self.start.is_some() && self.end.is_some()
129    }
130
131    /// Convert to URI format.
132    #[must_use]
133    pub fn to_uri(&self) -> ContentAnchorUri {
134        ContentAnchorUri::from(self.clone())
135    }
136}
137
138impl From<ContentAnchorUri> for ContentAnchor {
139    fn from(uri: ContentAnchorUri) -> Self {
140        Self {
141            block_id: uri.block_id,
142            offset: uri.offset,
143            start: uri.start,
144            end: uri.end,
145            content_hash: None,
146        }
147    }
148}
149
150/// URI representation of a content anchor.
151///
152/// Format: `#blockId` | `#blockId/offset` | `#blockId/start-end`
153///
154/// # Examples
155///
156/// - `#para-1` - Block-level anchor
157/// - `#para-1/15` - Point anchor at offset 15
158/// - `#para-1/10-25` - Range anchor from 10 to 25
159#[derive(Debug, Clone, PartialEq, Eq)]
160pub struct ContentAnchorUri {
161    /// The ID of the referenced block.
162    pub block_id: String,
163
164    /// Character offset for point anchors.
165    pub offset: Option<usize>,
166
167    /// Start of range for range anchors.
168    pub start: Option<usize>,
169
170    /// End of range for range anchors.
171    pub end: Option<usize>,
172}
173
174impl ContentAnchorUri {
175    /// Create a new block-level anchor URI.
176    #[must_use]
177    pub fn block(block_id: impl Into<String>) -> Self {
178        Self {
179            block_id: block_id.into(),
180            offset: None,
181            start: None,
182            end: None,
183        }
184    }
185
186    /// Create a new point anchor URI.
187    #[must_use]
188    pub fn point(block_id: impl Into<String>, offset: usize) -> Self {
189        Self {
190            block_id: block_id.into(),
191            offset: Some(offset),
192            start: None,
193            end: None,
194        }
195    }
196
197    /// Create a new range anchor URI.
198    #[must_use]
199    pub fn range(block_id: impl Into<String>, start: usize, end: usize) -> Self {
200        Self {
201            block_id: block_id.into(),
202            offset: None,
203            start: Some(start),
204            end: Some(end),
205        }
206    }
207}
208
209impl FromStr for ContentAnchorUri {
210    type Err = crate::Error;
211
212    fn from_str(s: &str) -> Result<Self, Self::Err> {
213        // Must start with #
214        let s = s
215            .strip_prefix('#')
216            .ok_or_else(|| crate::Error::InvalidManifest {
217                reason: format!("Anchor URI must start with '#': {s}"),
218            })?;
219
220        // Check if there's a position specifier
221        if let Some((block_id, position)) = s.split_once('/') {
222            if block_id.is_empty() {
223                return Err(crate::Error::InvalidManifest {
224                    reason: "Anchor URI block ID cannot be empty".to_string(),
225                });
226            }
227
228            // Check if it's a range (contains '-')
229            if let Some((start_str, end_str)) = position.split_once('-') {
230                let start =
231                    start_str
232                        .parse::<usize>()
233                        .map_err(|_| crate::Error::InvalidManifest {
234                            reason: format!("Invalid range start in anchor URI: {start_str}"),
235                        })?;
236                let end = end_str
237                    .parse::<usize>()
238                    .map_err(|_| crate::Error::InvalidManifest {
239                        reason: format!("Invalid range end in anchor URI: {end_str}"),
240                    })?;
241
242                Ok(Self::range(block_id, start, end))
243            } else {
244                // Point anchor
245                let offset =
246                    position
247                        .parse::<usize>()
248                        .map_err(|_| crate::Error::InvalidManifest {
249                            reason: format!("Invalid offset in anchor URI: {position}"),
250                        })?;
251
252                Ok(Self::point(block_id, offset))
253            }
254        } else {
255            // Block-level anchor
256            if s.is_empty() {
257                return Err(crate::Error::InvalidManifest {
258                    reason: "Anchor URI block ID cannot be empty".to_string(),
259                });
260            }
261            Ok(Self::block(s))
262        }
263    }
264}
265
266impl fmt::Display for ContentAnchorUri {
267    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268        write!(f, "#{}", self.block_id)?;
269
270        if let Some(offset) = self.offset {
271            write!(f, "/{offset}")?;
272        } else if let (Some(start), Some(end)) = (self.start, self.end) {
273            write!(f, "/{start}-{end}")?;
274        }
275
276        Ok(())
277    }
278}
279
280impl From<ContentAnchor> for ContentAnchorUri {
281    fn from(anchor: ContentAnchor) -> Self {
282        Self {
283            block_id: anchor.block_id,
284            offset: anchor.offset,
285            start: anchor.start,
286            end: anchor.end,
287        }
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn test_block_anchor() {
297        let anchor = ContentAnchor::block("para-1");
298        assert_eq!(anchor.block_id, "para-1");
299        assert!(anchor.is_block_anchor());
300        assert!(!anchor.is_point_anchor());
301        assert!(!anchor.is_range_anchor());
302    }
303
304    #[test]
305    fn test_point_anchor() {
306        let anchor = ContentAnchor::point("para-1", 15);
307        assert_eq!(anchor.block_id, "para-1");
308        assert_eq!(anchor.offset, Some(15));
309        assert!(!anchor.is_block_anchor());
310        assert!(anchor.is_point_anchor());
311        assert!(!anchor.is_range_anchor());
312    }
313
314    #[test]
315    fn test_range_anchor() {
316        let anchor = ContentAnchor::range("para-1", 10, 25);
317        assert_eq!(anchor.block_id, "para-1");
318        assert_eq!(anchor.start, Some(10));
319        assert_eq!(anchor.end, Some(25));
320        assert!(!anchor.is_block_anchor());
321        assert!(!anchor.is_point_anchor());
322        assert!(anchor.is_range_anchor());
323    }
324
325    #[test]
326    fn test_anchor_uri_parse_block() {
327        let uri: ContentAnchorUri = "#blockId".parse().unwrap();
328        assert_eq!(uri.block_id, "blockId");
329        assert!(uri.offset.is_none());
330        assert!(uri.start.is_none());
331        assert!(uri.end.is_none());
332    }
333
334    #[test]
335    fn test_anchor_uri_parse_point() {
336        let uri: ContentAnchorUri = "#blockId/15".parse().unwrap();
337        assert_eq!(uri.block_id, "blockId");
338        assert_eq!(uri.offset, Some(15));
339        assert!(uri.start.is_none());
340        assert!(uri.end.is_none());
341    }
342
343    #[test]
344    fn test_anchor_uri_parse_range() {
345        let uri: ContentAnchorUri = "#blockId/10-25".parse().unwrap();
346        assert_eq!(uri.block_id, "blockId");
347        assert!(uri.offset.is_none());
348        assert_eq!(uri.start, Some(10));
349        assert_eq!(uri.end, Some(25));
350    }
351
352    #[test]
353    fn test_anchor_uri_display() {
354        let block_uri = ContentAnchorUri::block("para-1");
355        assert_eq!(block_uri.to_string(), "#para-1");
356
357        let point_uri = ContentAnchorUri::point("para-1", 15);
358        assert_eq!(point_uri.to_string(), "#para-1/15");
359
360        let range_uri = ContentAnchorUri::range("para-1", 10, 25);
361        assert_eq!(range_uri.to_string(), "#para-1/10-25");
362    }
363
364    #[test]
365    fn test_anchor_uri_roundtrip() {
366        let cases = vec![
367            "#block-1",
368            "#para-1/15",
369            "#heading-2/10-25",
370            "#complex-id-123/0-100",
371        ];
372
373        for case in cases {
374            let uri: ContentAnchorUri = case.parse().unwrap();
375            assert_eq!(uri.to_string(), case);
376        }
377    }
378
379    #[test]
380    fn test_anchor_to_uri_conversion() {
381        let anchor = ContentAnchor::range("para-1", 10, 25);
382        let uri = anchor.to_uri();
383        assert_eq!(uri.to_string(), "#para-1/10-25");
384
385        // And back
386        let anchor2 = ContentAnchor::from(uri);
387        assert_eq!(anchor2.block_id, "para-1");
388        assert_eq!(anchor2.start, Some(10));
389        assert_eq!(anchor2.end, Some(25));
390    }
391
392    #[test]
393    fn test_anchor_uri_parse_errors() {
394        // Missing #
395        assert!("blockId".parse::<ContentAnchorUri>().is_err());
396
397        // Empty block ID
398        assert!("#".parse::<ContentAnchorUri>().is_err());
399        assert!("#/15".parse::<ContentAnchorUri>().is_err());
400
401        // Invalid offset
402        assert!("#blockId/abc".parse::<ContentAnchorUri>().is_err());
403
404        // Invalid range
405        assert!("#blockId/abc-25".parse::<ContentAnchorUri>().is_err());
406        assert!("#blockId/10-def".parse::<ContentAnchorUri>().is_err());
407    }
408
409    #[test]
410    fn test_anchor_serialization() {
411        let anchor = ContentAnchor::range("para-1", 10, 25);
412        let json = serde_json::to_string(&anchor).unwrap();
413        assert!(json.contains("\"blockId\":\"para-1\""));
414        assert!(json.contains("\"start\":10"));
415        assert!(json.contains("\"end\":25"));
416
417        let parsed: ContentAnchor = serde_json::from_str(&json).unwrap();
418        assert_eq!(parsed, anchor);
419    }
420
421    #[test]
422    fn test_anchor_with_content_hash() {
423        let hash = crate::Hasher::hash(crate::HashAlgorithm::Sha256, b"test content");
424        let anchor = ContentAnchor::block("para-1").with_content_hash(hash.clone());
425        assert_eq!(anchor.content_hash, Some(hash));
426    }
427}