turbovault_core/
models.rs

1//! Core data models representing Obsidian vault elements.
2//!
3//! These types are designed to be:
4//! - **Serializable**: All types derive Serialize/Deserialize
5//! - **Debuggable**: Derive Debug for easy inspection
6//! - **Cloneable**: `Arc<T>` friendly for shared ownership
7//! - **Type-Safe**: Enums replace magic strings
8//!
9//! The types roughly correspond to Python dataclasses in the reference implementation.
10
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13use std::path::PathBuf;
14
15/// Position in source text (line, column, byte offset)
16#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
17pub struct SourcePosition {
18    pub line: usize,
19    pub column: usize,
20    pub offset: usize,
21    pub length: usize,
22}
23
24impl SourcePosition {
25    /// Create a new source position
26    pub fn new(line: usize, column: usize, offset: usize, length: usize) -> Self {
27        Self {
28            line,
29            column,
30            offset,
31            length,
32        }
33    }
34
35    /// Create position at start
36    pub fn start() -> Self {
37        Self {
38            line: 0,
39            column: 0,
40            offset: 0,
41            length: 0,
42        }
43    }
44
45    /// Create position from byte offset by computing line and column.
46    ///
47    /// This is O(n) where n is the offset - suitable for single-use cases.
48    /// For bulk operations, use `from_offset_indexed` with a pre-computed `LineIndex`.
49    ///
50    /// Line numbers start at 1, column numbers start at 1.
51    pub fn from_offset(content: &str, offset: usize, length: usize) -> Self {
52        let before = &content[..offset.min(content.len())];
53        let line = before.matches('\n').count() + 1;
54        let column = before
55            .rfind('\n')
56            .map(|pos| offset - pos)
57            .unwrap_or(offset + 1);
58
59        Self {
60            line,
61            column,
62            offset,
63            length,
64        }
65    }
66
67    /// Create position from byte offset using a pre-computed line index.
68    ///
69    /// This is O(log n) - use for bulk parsing operations.
70    pub fn from_offset_indexed(index: &LineIndex, offset: usize, length: usize) -> Self {
71        let (line, column) = index.line_col(offset);
72        Self {
73            line,
74            column,
75            offset,
76            length,
77        }
78    }
79}
80
81/// Pre-computed line starts for O(log n) line/column lookup.
82///
83/// Build once per document, then use for all position lookups.
84/// This is essential for efficient parsing of documents with many OFM elements.
85///
86/// # Example
87/// ```
88/// use turbovault_core::{LineIndex, SourcePosition};
89///
90/// let content = "Line 1\nLine 2\nLine 3";
91/// let index = LineIndex::new(content);
92///
93/// // O(log n) lookup instead of O(n)
94/// let pos = SourcePosition::from_offset_indexed(&index, 7, 6);
95/// assert_eq!(pos.line, 2);
96/// assert_eq!(pos.column, 1);
97/// ```
98#[derive(Debug, Clone)]
99pub struct LineIndex {
100    /// Byte offsets where each line starts (line 1 = index 0)
101    line_starts: Vec<usize>,
102}
103
104impl LineIndex {
105    /// Build line index in O(n) - do once per document.
106    pub fn new(content: &str) -> Self {
107        let mut line_starts = vec![0];
108        for (i, ch) in content.char_indices() {
109            if ch == '\n' {
110                line_starts.push(i + 1);
111            }
112        }
113        Self { line_starts }
114    }
115
116    /// Get (line, column) for a byte offset in O(log n) via binary search.
117    ///
118    /// Line numbers start at 1, column numbers start at 1.
119    pub fn line_col(&self, offset: usize) -> (usize, usize) {
120        // Binary search to find which line contains this offset
121        let line_idx = self.line_starts.partition_point(|&start| start <= offset);
122        let line = line_idx.max(1); // Line numbers are 1-indexed
123        let line_start = self
124            .line_starts
125            .get(line_idx.saturating_sub(1))
126            .copied()
127            .unwrap_or(0);
128        let column = offset - line_start + 1; // Column numbers are 1-indexed
129        (line, column)
130    }
131
132    /// Get the byte offset where a line starts.
133    pub fn line_start(&self, line: usize) -> Option<usize> {
134        if line == 0 {
135            return None;
136        }
137        self.line_starts.get(line - 1).copied()
138    }
139
140    /// Get total number of lines.
141    pub fn line_count(&self) -> usize {
142        self.line_starts.len()
143    }
144}
145
146/// Type of link in Obsidian content
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
148pub enum LinkType {
149    /// Wikilink: `[[Note]]`
150    WikiLink,
151    /// Embedded note: `![[Note]]`
152    Embed,
153    /// Block reference: `[[Note#^block]]`
154    BlockRef,
155    /// Heading reference: `[[Note#Heading]]` or `file.md#section`
156    HeadingRef,
157    /// Same-document anchor: `#section` (no file reference)
158    Anchor,
159    /// Markdown link: `[text](url)` to relative file
160    MarkdownLink,
161    /// External URL: `http://...`, `https://...`, `mailto:...`
162    ExternalLink,
163}
164
165/// A link in vault content
166#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
167pub struct Link {
168    pub type_: LinkType,
169    pub source_file: PathBuf,
170    pub target: String,
171    pub display_text: Option<String>,
172    pub position: SourcePosition,
173    pub resolved_target: Option<PathBuf>,
174    pub is_valid: bool,
175}
176
177impl Link {
178    /// Create a new link
179    pub fn new(
180        type_: LinkType,
181        source_file: PathBuf,
182        target: String,
183        position: SourcePosition,
184    ) -> Self {
185        Self {
186            type_,
187            source_file,
188            target,
189            display_text: None,
190            position,
191            resolved_target: None,
192            is_valid: true,
193        }
194    }
195}
196
197/// A heading in vault content
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct Heading {
200    pub text: String,
201    pub level: u8, // 1-6
202    pub position: SourcePosition,
203    pub anchor: Option<String>,
204}
205
206/// A tag in vault content
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct Tag {
209    pub name: String,
210    pub position: SourcePosition,
211    pub is_nested: bool, // #parent/child
212}
213
214/// A task item in vault content
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct TaskItem {
217    pub content: String,
218    pub is_completed: bool,
219    pub position: SourcePosition,
220    pub due_date: Option<String>,
221}
222
223/// Type of callout block
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
225pub enum CalloutType {
226    Note,
227    Tip,
228    Info,
229    Todo,
230    Important,
231    Success,
232    Question,
233    Warning,
234    Failure,
235    Danger,
236    Bug,
237    Example,
238    Quote,
239}
240
241/// A callout block in vault content
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct Callout {
244    pub type_: CalloutType,
245    pub title: Option<String>,
246    pub content: String,
247    pub position: SourcePosition,
248    pub is_foldable: bool,
249}
250
251/// A block in vault content (Obsidian block reference with ^id)
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct Block {
254    pub content: String,
255    pub block_id: Option<String>,
256    pub position: SourcePosition,
257    pub type_: String, // paragraph, heading, list_item, etc.
258}
259
260// ============================================================================
261// Content Block Types (for full markdown parsing)
262// ============================================================================
263
264/// A parsed content block in a markdown document.
265///
266/// These represent the block-level structure of markdown content,
267/// similar to an AST but optimized for consumption by tools like treemd.
268#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
269#[serde(tag = "type", rename_all = "lowercase")]
270pub enum ContentBlock {
271    /// A heading (# H1, ## H2, etc.)
272    Heading {
273        level: usize,
274        content: String,
275        inline: Vec<InlineElement>,
276        anchor: Option<String>,
277    },
278    /// A paragraph of text
279    Paragraph {
280        content: String,
281        inline: Vec<InlineElement>,
282    },
283    /// A fenced or indented code block
284    Code {
285        language: Option<String>,
286        content: String,
287        start_line: usize,
288        end_line: usize,
289    },
290    /// An ordered or unordered list
291    List { ordered: bool, items: Vec<ListItem> },
292    /// A blockquote (> text)
293    Blockquote {
294        content: String,
295        blocks: Vec<ContentBlock>,
296    },
297    /// A table with headers and rows
298    Table {
299        headers: Vec<String>,
300        alignments: Vec<TableAlignment>,
301        rows: Vec<Vec<String>>,
302    },
303    /// An image (standalone, not inline)
304    Image {
305        alt: String,
306        src: String,
307        title: Option<String>,
308    },
309    /// A horizontal rule (---, ***, ___)
310    HorizontalRule,
311    /// HTML <details><summary> block
312    Details {
313        summary: String,
314        content: String,
315        blocks: Vec<ContentBlock>,
316    },
317}
318
319/// An inline element within a block.
320///
321/// These represent inline formatting and links within text content.
322#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
323#[serde(tag = "type", rename_all = "lowercase")]
324pub enum InlineElement {
325    /// Plain text
326    Text { value: String },
327    /// Bold text (**text** or __text__)
328    Strong { value: String },
329    /// Italic text (*text* or _text_)
330    Emphasis { value: String },
331    /// Inline code (`code`)
332    Code { value: String },
333    /// A link [text](url)
334    Link {
335        text: String,
336        url: String,
337        title: Option<String>,
338    },
339    /// An inline image ![alt](src)
340    Image {
341        alt: String,
342        src: String,
343        title: Option<String>,
344    },
345    /// Strikethrough text (~~text~~)
346    Strikethrough { value: String },
347}
348
349/// A list item with optional checkbox and nested content.
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
351pub struct ListItem {
352    /// For task lists: Some(true) = checked, Some(false) = unchecked, None = not a task
353    pub checked: Option<bool>,
354    /// Raw text content of the item
355    pub content: String,
356    /// Parsed inline elements
357    pub inline: Vec<InlineElement>,
358    /// Nested blocks (e.g., code blocks, sub-lists inside list items)
359    #[serde(default, skip_serializing_if = "Vec::is_empty")]
360    pub blocks: Vec<ContentBlock>,
361}
362
363/// Table column alignment.
364#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
365#[serde(rename_all = "lowercase")]
366pub enum TableAlignment {
367    Left,
368    Center,
369    Right,
370    None,
371}
372
373/// YAML frontmatter
374#[derive(Debug, Clone, Serialize, Deserialize)]
375pub struct Frontmatter {
376    pub data: HashMap<String, serde_json::Value>,
377    pub position: SourcePosition,
378}
379
380impl Frontmatter {
381    /// Extract tags from frontmatter
382    pub fn tags(&self) -> Vec<String> {
383        match self.data.get("tags") {
384            Some(serde_json::Value::String(s)) => vec![s.clone()],
385            Some(serde_json::Value::Array(arr)) => arr
386                .iter()
387                .filter_map(|v| v.as_str().map(|s| s.to_string()))
388                .collect(),
389            _ => vec![],
390        }
391    }
392
393    /// Extract aliases from frontmatter
394    pub fn aliases(&self) -> Vec<String> {
395        match self.data.get("aliases") {
396            Some(serde_json::Value::String(s)) => vec![s.clone()],
397            Some(serde_json::Value::Array(arr)) => arr
398                .iter()
399                .filter_map(|v| v.as_str().map(|s| s.to_string()))
400                .collect(),
401            _ => vec![],
402        }
403    }
404}
405
406/// File metadata
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct FileMetadata {
409    pub path: PathBuf,
410    pub size: u64,
411    pub created_at: f64,
412    pub modified_at: f64,
413    pub checksum: String,
414    pub is_attachment: bool,
415}
416
417/// A complete vault file with parsed content
418#[derive(Debug, Clone, Serialize, Deserialize)]
419pub struct VaultFile {
420    pub path: PathBuf,
421    pub content: String,
422    pub metadata: FileMetadata,
423
424    // Parsed elements
425    pub frontmatter: Option<Frontmatter>,
426    pub headings: Vec<Heading>,
427    pub links: Vec<Link>,
428    pub backlinks: HashSet<Link>,
429    pub blocks: Vec<Block>,
430    pub tags: Vec<Tag>,
431    pub callouts: Vec<Callout>,
432    pub tasks: Vec<TaskItem>,
433
434    // Cache status
435    pub is_parsed: bool,
436    pub parse_error: Option<String>,
437    pub last_parsed: Option<f64>,
438}
439
440impl VaultFile {
441    /// Create a new vault file
442    pub fn new(path: PathBuf, content: String, metadata: FileMetadata) -> Self {
443        Self {
444            path,
445            content,
446            metadata,
447            frontmatter: None,
448            headings: vec![],
449            links: vec![],
450            backlinks: HashSet::new(),
451            blocks: vec![],
452            tags: vec![],
453            callouts: vec![],
454            tasks: vec![],
455            is_parsed: false,
456            parse_error: None,
457            last_parsed: None,
458        }
459    }
460
461    /// Get outgoing links
462    pub fn outgoing_links(&self) -> HashSet<&str> {
463        self.links
464            .iter()
465            .filter(|link| matches!(link.type_, LinkType::WikiLink | LinkType::Embed))
466            .map(|link| link.target.as_str())
467            .collect()
468    }
469
470    /// Get headings indexed by text
471    pub fn headings_by_text(&self) -> HashMap<&str, &Heading> {
472        self.headings.iter().map(|h| (h.text.as_str(), h)).collect()
473    }
474
475    /// Get blocks with IDs
476    pub fn blocks_with_ids(&self) -> HashMap<&str, &Block> {
477        self.blocks
478            .iter()
479            .filter_map(|b| b.block_id.as_deref().map(|id| (id, b)))
480            .collect()
481    }
482
483    /// Check if file contains a tag
484    pub fn has_tag(&self, tag: &str) -> bool {
485        if let Some(fm) = &self.frontmatter
486            && fm.tags().contains(&tag.to_string())
487        {
488            return true;
489        }
490
491        self.tags.iter().any(|t| t.name == tag)
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    #[test]
500    fn test_source_position() {
501        let pos = SourcePosition::new(5, 10, 100, 20);
502        assert_eq!(pos.line, 5);
503        assert_eq!(pos.column, 10);
504        assert_eq!(pos.offset, 100);
505        assert_eq!(pos.length, 20);
506    }
507
508    #[test]
509    fn test_frontmatter_tags() {
510        let mut data = HashMap::new();
511        data.insert(
512            "tags".to_string(),
513            serde_json::Value::Array(vec![
514                serde_json::Value::String("rust".to_string()),
515                serde_json::Value::String("mcp".to_string()),
516            ]),
517        );
518
519        let fm = Frontmatter {
520            data,
521            position: SourcePosition::start(),
522        };
523
524        let tags = fm.tags();
525        assert_eq!(tags.len(), 2);
526        assert!(tags.contains(&"rust".to_string()));
527    }
528
529    #[test]
530    fn test_line_index_single_line() {
531        let content = "Hello, world!";
532        let index = LineIndex::new(content);
533
534        assert_eq!(index.line_count(), 1);
535        assert_eq!(index.line_col(0), (1, 1)); // 'H'
536        assert_eq!(index.line_col(7), (1, 8)); // 'w'
537    }
538
539    #[test]
540    fn test_line_index_multiline() {
541        let content = "Line 1\nLine 2\nLine 3";
542        let index = LineIndex::new(content);
543
544        assert_eq!(index.line_count(), 3);
545
546        // Line 1
547        assert_eq!(index.line_col(0), (1, 1)); // 'L' of Line 1
548        assert_eq!(index.line_col(5), (1, 6)); // '1'
549
550        // Line 2 (offset 7 = first char after newline)
551        assert_eq!(index.line_col(7), (2, 1)); // 'L' of Line 2
552        assert_eq!(index.line_col(13), (2, 7)); // '2'
553
554        // Line 3 (offset 14 = first char after second newline)
555        assert_eq!(index.line_col(14), (3, 1)); // 'L' of Line 3
556    }
557
558    #[test]
559    fn test_line_index_line_start() {
560        let content = "Line 1\nLine 2\nLine 3";
561        let index = LineIndex::new(content);
562
563        assert_eq!(index.line_start(1), Some(0));
564        assert_eq!(index.line_start(2), Some(7));
565        assert_eq!(index.line_start(3), Some(14));
566        assert_eq!(index.line_start(0), None); // Invalid line
567        assert_eq!(index.line_start(4), None); // Beyond content
568    }
569
570    #[test]
571    fn test_source_position_from_offset() {
572        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
573
574        // Position of [[Link]] starts at offset 14
575        let pos = SourcePosition::from_offset(content, 14, 8);
576        assert_eq!(pos.line, 2);
577        assert_eq!(pos.column, 8); // "Line 2 " = 7 chars, so column 8
578        assert_eq!(pos.offset, 14);
579        assert_eq!(pos.length, 8);
580    }
581
582    #[test]
583    fn test_source_position_from_offset_indexed() {
584        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
585        let index = LineIndex::new(content);
586
587        // Same test as above but using indexed lookup
588        let pos = SourcePosition::from_offset_indexed(&index, 14, 8);
589        assert_eq!(pos.line, 2);
590        assert_eq!(pos.column, 8);
591        assert_eq!(pos.offset, 14);
592        assert_eq!(pos.length, 8);
593    }
594
595    #[test]
596    fn test_source_position_first_line() {
597        let content = "[[Link]] at start";
598
599        let pos = SourcePosition::from_offset(content, 0, 8);
600        assert_eq!(pos.line, 1);
601        assert_eq!(pos.column, 1);
602    }
603
604    #[test]
605    fn test_line_index_empty_content() {
606        let content = "";
607        let index = LineIndex::new(content);
608
609        assert_eq!(index.line_count(), 1); // Even empty content has "line 1"
610        assert_eq!(index.line_col(0), (1, 1));
611    }
612
613    #[test]
614    fn test_line_index_trailing_newline() {
615        let content = "Line 1\n";
616        let index = LineIndex::new(content);
617
618        assert_eq!(index.line_count(), 2); // Line 1 + empty line 2
619        assert_eq!(index.line_col(6), (1, 7)); // The newline itself
620        assert_eq!(index.line_col(7), (2, 1)); // After newline
621    }
622}