turbovault_core/
models.rs

1//! Core data models representing Obsidian vault elements.
2//!
3//! These types are designed to be:
4//! - **Serializable**: All types derive Serialize/Deserialize
5//! - **Debuggable**: Derive Debug for easy inspection
6//! - **Cloneable**: `Arc<T>` friendly for shared ownership
7//! - **Type-Safe**: Enums replace magic strings
8//!
9//! The types roughly correspond to Python dataclasses in the reference implementation.
10
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13use std::path::PathBuf;
14
15/// Position in source text (line, column, byte offset)
16#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
17pub struct SourcePosition {
18    pub line: usize,
19    pub column: usize,
20    pub offset: usize,
21    pub length: usize,
22}
23
24impl SourcePosition {
25    /// Create a new source position
26    pub fn new(line: usize, column: usize, offset: usize, length: usize) -> Self {
27        Self {
28            line,
29            column,
30            offset,
31            length,
32        }
33    }
34
35    /// Create position at start
36    pub fn start() -> Self {
37        Self {
38            line: 0,
39            column: 0,
40            offset: 0,
41            length: 0,
42        }
43    }
44
45    /// Create position from byte offset by computing line and column.
46    ///
47    /// This is O(n) where n is the offset - suitable for single-use cases.
48    /// For bulk operations, use `from_offset_indexed` with a pre-computed `LineIndex`.
49    ///
50    /// Line numbers start at 1, column numbers start at 1.
51    pub fn from_offset(content: &str, offset: usize, length: usize) -> Self {
52        let before = &content[..offset.min(content.len())];
53        let line = before.matches('\n').count() + 1;
54        let column = before
55            .rfind('\n')
56            .map(|pos| offset - pos)
57            .unwrap_or(offset + 1);
58
59        Self {
60            line,
61            column,
62            offset,
63            length,
64        }
65    }
66
67    /// Create position from byte offset using a pre-computed line index.
68    ///
69    /// This is O(log n) - use for bulk parsing operations.
70    pub fn from_offset_indexed(index: &LineIndex, offset: usize, length: usize) -> Self {
71        let (line, column) = index.line_col(offset);
72        Self {
73            line,
74            column,
75            offset,
76            length,
77        }
78    }
79}
80
81/// Pre-computed line starts for O(log n) line/column lookup.
82///
83/// Build once per document, then use for all position lookups.
84/// This is essential for efficient parsing of documents with many OFM elements.
85///
86/// # Example
87/// ```
88/// use turbovault_core::{LineIndex, SourcePosition};
89///
90/// let content = "Line 1\nLine 2\nLine 3";
91/// let index = LineIndex::new(content);
92///
93/// // O(log n) lookup instead of O(n)
94/// let pos = SourcePosition::from_offset_indexed(&index, 7, 6);
95/// assert_eq!(pos.line, 2);
96/// assert_eq!(pos.column, 1);
97/// ```
98#[derive(Debug, Clone)]
99pub struct LineIndex {
100    /// Byte offsets where each line starts (line 1 = index 0)
101    line_starts: Vec<usize>,
102}
103
104impl LineIndex {
105    /// Build line index in O(n) - do once per document.
106    pub fn new(content: &str) -> Self {
107        let mut line_starts = vec![0];
108        for (i, ch) in content.char_indices() {
109            if ch == '\n' {
110                line_starts.push(i + 1);
111            }
112        }
113        Self { line_starts }
114    }
115
116    /// Get (line, column) for a byte offset in O(log n) via binary search.
117    ///
118    /// Line numbers start at 1, column numbers start at 1.
119    pub fn line_col(&self, offset: usize) -> (usize, usize) {
120        // Binary search to find which line contains this offset
121        let line_idx = self.line_starts.partition_point(|&start| start <= offset);
122        let line = line_idx.max(1); // Line numbers are 1-indexed
123        let line_start = self
124            .line_starts
125            .get(line_idx.saturating_sub(1))
126            .copied()
127            .unwrap_or(0);
128        let column = offset - line_start + 1; // Column numbers are 1-indexed
129        (line, column)
130    }
131
132    /// Get the byte offset where a line starts.
133    pub fn line_start(&self, line: usize) -> Option<usize> {
134        if line == 0 {
135            return None;
136        }
137        self.line_starts.get(line - 1).copied()
138    }
139
140    /// Get total number of lines.
141    pub fn line_count(&self) -> usize {
142        self.line_starts.len()
143    }
144}
145
146/// Type of link in Obsidian content
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
148pub enum LinkType {
149    /// Wikilink: `[[Note]]`
150    WikiLink,
151    /// Embedded note: `![[Note]]`
152    Embed,
153    /// Block reference: `[[Note#^block]]`
154    BlockRef,
155    /// Heading reference: `[[Note#Heading]]` or `file.md#section`
156    HeadingRef,
157    /// Same-document anchor: `#section` (no file reference)
158    Anchor,
159    /// Markdown link: `[text](url)` to relative file
160    MarkdownLink,
161    /// External URL: `http://...`, `https://...`, `mailto:...`
162    ExternalLink,
163}
164
165/// A link in vault content
166#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
167pub struct Link {
168    pub type_: LinkType,
169    pub source_file: PathBuf,
170    pub target: String,
171    pub display_text: Option<String>,
172    pub position: SourcePosition,
173    pub resolved_target: Option<PathBuf>,
174    pub is_valid: bool,
175}
176
177impl Link {
178    /// Create a new link
179    pub fn new(
180        type_: LinkType,
181        source_file: PathBuf,
182        target: String,
183        position: SourcePosition,
184    ) -> Self {
185        Self {
186            type_,
187            source_file,
188            target,
189            display_text: None,
190            position,
191            resolved_target: None,
192            is_valid: true,
193        }
194    }
195}
196
197/// A heading in vault content
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct Heading {
200    pub text: String,
201    pub level: u8, // 1-6
202    pub position: SourcePosition,
203    pub anchor: Option<String>,
204}
205
206/// A tag in vault content
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct Tag {
209    pub name: String,
210    pub position: SourcePosition,
211    pub is_nested: bool, // #parent/child
212}
213
214/// A task item in vault content
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct TaskItem {
217    pub content: String,
218    pub is_completed: bool,
219    pub position: SourcePosition,
220    pub due_date: Option<String>,
221}
222
223/// Type of callout block
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
225pub enum CalloutType {
226    Note,
227    Tip,
228    Info,
229    Todo,
230    Important,
231    Success,
232    Question,
233    Warning,
234    Failure,
235    Danger,
236    Bug,
237    Example,
238    Quote,
239}
240
241/// A callout block in vault content
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct Callout {
244    pub type_: CalloutType,
245    pub title: Option<String>,
246    pub content: String,
247    pub position: SourcePosition,
248    pub is_foldable: bool,
249}
250
251/// A block in vault content (Obsidian block reference with ^id)
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct Block {
254    pub content: String,
255    pub block_id: Option<String>,
256    pub position: SourcePosition,
257    pub type_: String, // paragraph, heading, list_item, etc.
258}
259
260// ============================================================================
261// Content Block Types (for full markdown parsing)
262// ============================================================================
263
264/// A parsed content block in a markdown document.
265///
266/// These represent the block-level structure of markdown content,
267/// similar to an AST but optimized for consumption by tools like treemd.
268#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
269#[serde(tag = "type", rename_all = "lowercase")]
270pub enum ContentBlock {
271    /// A heading (# H1, ## H2, etc.)
272    Heading {
273        level: usize,
274        content: String,
275        inline: Vec<InlineElement>,
276        anchor: Option<String>,
277    },
278    /// A paragraph of text
279    Paragraph {
280        content: String,
281        inline: Vec<InlineElement>,
282    },
283    /// A fenced or indented code block
284    Code {
285        language: Option<String>,
286        content: String,
287        start_line: usize,
288        end_line: usize,
289    },
290    /// An ordered or unordered list
291    List { ordered: bool, items: Vec<ListItem> },
292    /// A blockquote (> text)
293    Blockquote {
294        content: String,
295        blocks: Vec<ContentBlock>,
296    },
297    /// A table with headers and rows
298    Table {
299        headers: Vec<String>,
300        alignments: Vec<TableAlignment>,
301        rows: Vec<Vec<String>>,
302    },
303    /// An image (standalone, not inline)
304    Image {
305        alt: String,
306        src: String,
307        title: Option<String>,
308    },
309    /// A horizontal rule (---, ***, ___)
310    HorizontalRule,
311    /// HTML <details><summary> block
312    Details {
313        summary: String,
314        content: String,
315        blocks: Vec<ContentBlock>,
316    },
317}
318
319impl ContentBlock {
320    /// Extract plain text from this content block.
321    ///
322    /// Returns only the visible text content, stripping markdown syntax.
323    /// This is useful for search indexing, accessibility, and accurate word counts.
324    ///
325    /// # Example
326    /// ```
327    /// use turbovault_core::{ContentBlock, InlineElement};
328    ///
329    /// let block = ContentBlock::Paragraph {
330    ///     content: "[Overview](#overview) and **bold**".to_string(),
331    ///     inline: vec![
332    ///         InlineElement::Link {
333    ///             text: "Overview".to_string(),
334    ///             url: "#overview".to_string(),
335    ///             title: None,
336    ///             line_offset: None,
337    ///         },
338    ///         InlineElement::Text { value: " and ".to_string() },
339    ///         InlineElement::Strong { value: "bold".to_string() },
340    ///     ],
341    /// };
342    /// assert_eq!(block.to_plain_text(), "Overview and bold");
343    /// ```
344    #[must_use]
345    pub fn to_plain_text(&self) -> String {
346        match self {
347            Self::Heading { inline, .. } | Self::Paragraph { inline, .. } => {
348                inline.iter().map(InlineElement::to_plain_text).collect()
349            }
350            Self::Code { content, .. } => content.clone(),
351            Self::List { items, .. } => items
352                .iter()
353                .map(ListItem::to_plain_text)
354                .collect::<Vec<_>>()
355                .join("\n"),
356            Self::Blockquote { blocks, .. } => blocks
357                .iter()
358                .map(Self::to_plain_text)
359                .collect::<Vec<_>>()
360                .join("\n"),
361            Self::Table { headers, rows, .. } => {
362                let header_text = headers.join("\t");
363                let row_texts: Vec<String> = rows.iter().map(|row| row.join("\t")).collect();
364                if row_texts.is_empty() {
365                    header_text
366                } else {
367                    format!("{}\n{}", header_text, row_texts.join("\n"))
368                }
369            }
370            Self::Image { alt, .. } => alt.clone(),
371            Self::HorizontalRule => String::new(),
372            Self::Details {
373                summary, blocks, ..
374            } => {
375                let blocks_text: String = blocks
376                    .iter()
377                    .map(Self::to_plain_text)
378                    .collect::<Vec<_>>()
379                    .join("\n");
380                if blocks_text.is_empty() {
381                    summary.clone()
382                } else {
383                    format!("{}\n{}", summary, blocks_text)
384                }
385            }
386        }
387    }
388}
389
390/// An inline element within a block.
391///
392/// These represent inline formatting and links within text content.
393#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
394#[serde(tag = "type", rename_all = "lowercase")]
395pub enum InlineElement {
396    /// Plain text
397    Text { value: String },
398    /// Bold text (**text** or __text__)
399    Strong { value: String },
400    /// Italic text (*text* or _text_)
401    Emphasis { value: String },
402    /// Inline code (`code`)
403    Code { value: String },
404    /// A link [text](url)
405    Link {
406        text: String,
407        url: String,
408        title: Option<String>,
409        /// Relative line offset within parent block (for nested list items)
410        #[serde(default, skip_serializing_if = "Option::is_none")]
411        line_offset: Option<usize>,
412    },
413    /// An inline image ![alt](src)
414    Image {
415        alt: String,
416        src: String,
417        title: Option<String>,
418        /// Relative line offset within parent block (for nested list items)
419        #[serde(default, skip_serializing_if = "Option::is_none")]
420        line_offset: Option<usize>,
421    },
422    /// Strikethrough text (~~text~~)
423    Strikethrough { value: String },
424}
425
426impl InlineElement {
427    /// Extract plain text from this inline element.
428    ///
429    /// Returns only the visible text content, stripping markdown syntax.
430    /// For links, returns the link text (not the URL).
431    /// For images, returns the alt text.
432    ///
433    /// # Example
434    /// ```
435    /// use turbovault_core::InlineElement;
436    ///
437    /// let link = InlineElement::Link {
438    ///     text: "Overview".to_string(),
439    ///     url: "#overview".to_string(),
440    ///     title: None,
441    ///     line_offset: None,
442    /// };
443    /// assert_eq!(link.to_plain_text(), "Overview");
444    /// ```
445    #[must_use]
446    pub fn to_plain_text(&self) -> &str {
447        match self {
448            Self::Text { value }
449            | Self::Strong { value }
450            | Self::Emphasis { value }
451            | Self::Code { value }
452            | Self::Strikethrough { value } => value,
453            Self::Link { text, .. } => text,
454            Self::Image { alt, .. } => alt,
455        }
456    }
457}
458
459/// A list item with optional checkbox and nested content.
460#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
461pub struct ListItem {
462    /// For task lists: Some(true) = checked, Some(false) = unchecked, None = not a task
463    pub checked: Option<bool>,
464    /// Raw text content of the item
465    pub content: String,
466    /// Parsed inline elements
467    pub inline: Vec<InlineElement>,
468    /// Nested blocks (e.g., code blocks, sub-lists inside list items)
469    #[serde(default, skip_serializing_if = "Vec::is_empty")]
470    pub blocks: Vec<ContentBlock>,
471}
472
473impl ListItem {
474    /// Extract plain text from this list item.
475    ///
476    /// Returns the visible text content by joining inline elements.
477    /// Includes nested block content recursively.
478    ///
479    /// # Example
480    /// ```
481    /// use turbovault_core::{ListItem, InlineElement};
482    ///
483    /// let item = ListItem {
484    ///     checked: Some(false),
485    ///     content: "Todo item".to_string(),
486    ///     inline: vec![InlineElement::Text { value: "Todo item".to_string() }],
487    ///     blocks: vec![],
488    /// };
489    /// assert_eq!(item.to_plain_text(), "Todo item");
490    /// ```
491    #[must_use]
492    pub fn to_plain_text(&self) -> String {
493        let mut result = String::new();
494
495        // Extract text from inline elements
496        for elem in &self.inline {
497            result.push_str(elem.to_plain_text());
498        }
499
500        // Include nested blocks
501        for block in &self.blocks {
502            if !result.is_empty() && !result.ends_with('\n') {
503                result.push('\n');
504            }
505            result.push_str(&block.to_plain_text());
506        }
507
508        result
509    }
510}
511
512/// Table column alignment.
513#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
514#[serde(rename_all = "lowercase")]
515pub enum TableAlignment {
516    Left,
517    Center,
518    Right,
519    None,
520}
521
522/// YAML frontmatter
523#[derive(Debug, Clone, Serialize, Deserialize)]
524pub struct Frontmatter {
525    pub data: HashMap<String, serde_json::Value>,
526    pub position: SourcePosition,
527}
528
529impl Frontmatter {
530    /// Extract tags from frontmatter
531    pub fn tags(&self) -> Vec<String> {
532        match self.data.get("tags") {
533            Some(serde_json::Value::String(s)) => vec![s.clone()],
534            Some(serde_json::Value::Array(arr)) => arr
535                .iter()
536                .filter_map(|v| v.as_str().map(|s| s.to_string()))
537                .collect(),
538            _ => vec![],
539        }
540    }
541
542    /// Extract aliases from frontmatter
543    pub fn aliases(&self) -> Vec<String> {
544        match self.data.get("aliases") {
545            Some(serde_json::Value::String(s)) => vec![s.clone()],
546            Some(serde_json::Value::Array(arr)) => arr
547                .iter()
548                .filter_map(|v| v.as_str().map(|s| s.to_string()))
549                .collect(),
550            _ => vec![],
551        }
552    }
553}
554
555/// File metadata
556#[derive(Debug, Clone, Serialize, Deserialize)]
557pub struct FileMetadata {
558    pub path: PathBuf,
559    pub size: u64,
560    pub created_at: f64,
561    pub modified_at: f64,
562    pub checksum: String,
563    pub is_attachment: bool,
564}
565
566/// A complete vault file with parsed content
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct VaultFile {
569    pub path: PathBuf,
570    pub content: String,
571    pub metadata: FileMetadata,
572
573    // Parsed elements
574    pub frontmatter: Option<Frontmatter>,
575    pub headings: Vec<Heading>,
576    pub links: Vec<Link>,
577    pub backlinks: HashSet<Link>,
578    pub blocks: Vec<Block>,
579    pub tags: Vec<Tag>,
580    pub callouts: Vec<Callout>,
581    pub tasks: Vec<TaskItem>,
582
583    // Cache status
584    pub is_parsed: bool,
585    pub parse_error: Option<String>,
586    pub last_parsed: Option<f64>,
587}
588
589impl VaultFile {
590    /// Create a new vault file
591    pub fn new(path: PathBuf, content: String, metadata: FileMetadata) -> Self {
592        Self {
593            path,
594            content,
595            metadata,
596            frontmatter: None,
597            headings: vec![],
598            links: vec![],
599            backlinks: HashSet::new(),
600            blocks: vec![],
601            tags: vec![],
602            callouts: vec![],
603            tasks: vec![],
604            is_parsed: false,
605            parse_error: None,
606            last_parsed: None,
607        }
608    }
609
610    /// Get outgoing links
611    pub fn outgoing_links(&self) -> HashSet<&str> {
612        self.links
613            .iter()
614            .filter(|link| matches!(link.type_, LinkType::WikiLink | LinkType::Embed))
615            .map(|link| link.target.as_str())
616            .collect()
617    }
618
619    /// Get headings indexed by text
620    pub fn headings_by_text(&self) -> HashMap<&str, &Heading> {
621        self.headings.iter().map(|h| (h.text.as_str(), h)).collect()
622    }
623
624    /// Get blocks with IDs
625    pub fn blocks_with_ids(&self) -> HashMap<&str, &Block> {
626        self.blocks
627            .iter()
628            .filter_map(|b| b.block_id.as_deref().map(|id| (id, b)))
629            .collect()
630    }
631
632    /// Check if file contains a tag
633    pub fn has_tag(&self, tag: &str) -> bool {
634        if let Some(fm) = &self.frontmatter
635            && fm.tags().contains(&tag.to_string())
636        {
637            return true;
638        }
639
640        self.tags.iter().any(|t| t.name == tag)
641    }
642}
643
644#[cfg(test)]
645mod tests {
646    use super::*;
647
648    #[test]
649    fn test_source_position() {
650        let pos = SourcePosition::new(5, 10, 100, 20);
651        assert_eq!(pos.line, 5);
652        assert_eq!(pos.column, 10);
653        assert_eq!(pos.offset, 100);
654        assert_eq!(pos.length, 20);
655    }
656
657    #[test]
658    fn test_frontmatter_tags() {
659        let mut data = HashMap::new();
660        data.insert(
661            "tags".to_string(),
662            serde_json::Value::Array(vec![
663                serde_json::Value::String("rust".to_string()),
664                serde_json::Value::String("mcp".to_string()),
665            ]),
666        );
667
668        let fm = Frontmatter {
669            data,
670            position: SourcePosition::start(),
671        };
672
673        let tags = fm.tags();
674        assert_eq!(tags.len(), 2);
675        assert!(tags.contains(&"rust".to_string()));
676    }
677
678    #[test]
679    fn test_line_index_single_line() {
680        let content = "Hello, world!";
681        let index = LineIndex::new(content);
682
683        assert_eq!(index.line_count(), 1);
684        assert_eq!(index.line_col(0), (1, 1)); // 'H'
685        assert_eq!(index.line_col(7), (1, 8)); // 'w'
686    }
687
688    #[test]
689    fn test_line_index_multiline() {
690        let content = "Line 1\nLine 2\nLine 3";
691        let index = LineIndex::new(content);
692
693        assert_eq!(index.line_count(), 3);
694
695        // Line 1
696        assert_eq!(index.line_col(0), (1, 1)); // 'L' of Line 1
697        assert_eq!(index.line_col(5), (1, 6)); // '1'
698
699        // Line 2 (offset 7 = first char after newline)
700        assert_eq!(index.line_col(7), (2, 1)); // 'L' of Line 2
701        assert_eq!(index.line_col(13), (2, 7)); // '2'
702
703        // Line 3 (offset 14 = first char after second newline)
704        assert_eq!(index.line_col(14), (3, 1)); // 'L' of Line 3
705    }
706
707    #[test]
708    fn test_line_index_line_start() {
709        let content = "Line 1\nLine 2\nLine 3";
710        let index = LineIndex::new(content);
711
712        assert_eq!(index.line_start(1), Some(0));
713        assert_eq!(index.line_start(2), Some(7));
714        assert_eq!(index.line_start(3), Some(14));
715        assert_eq!(index.line_start(0), None); // Invalid line
716        assert_eq!(index.line_start(4), None); // Beyond content
717    }
718
719    #[test]
720    fn test_source_position_from_offset() {
721        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
722
723        // Position of [[Link]] starts at offset 14
724        let pos = SourcePosition::from_offset(content, 14, 8);
725        assert_eq!(pos.line, 2);
726        assert_eq!(pos.column, 8); // "Line 2 " = 7 chars, so column 8
727        assert_eq!(pos.offset, 14);
728        assert_eq!(pos.length, 8);
729    }
730
731    #[test]
732    fn test_source_position_from_offset_indexed() {
733        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
734        let index = LineIndex::new(content);
735
736        // Same test as above but using indexed lookup
737        let pos = SourcePosition::from_offset_indexed(&index, 14, 8);
738        assert_eq!(pos.line, 2);
739        assert_eq!(pos.column, 8);
740        assert_eq!(pos.offset, 14);
741        assert_eq!(pos.length, 8);
742    }
743
744    #[test]
745    fn test_source_position_first_line() {
746        let content = "[[Link]] at start";
747
748        let pos = SourcePosition::from_offset(content, 0, 8);
749        assert_eq!(pos.line, 1);
750        assert_eq!(pos.column, 1);
751    }
752
753    #[test]
754    fn test_line_index_empty_content() {
755        let content = "";
756        let index = LineIndex::new(content);
757
758        assert_eq!(index.line_count(), 1); // Even empty content has "line 1"
759        assert_eq!(index.line_col(0), (1, 1));
760    }
761
762    #[test]
763    fn test_line_index_trailing_newline() {
764        let content = "Line 1\n";
765        let index = LineIndex::new(content);
766
767        assert_eq!(index.line_count(), 2); // Line 1 + empty line 2
768        assert_eq!(index.line_col(6), (1, 7)); // The newline itself
769        assert_eq!(index.line_col(7), (2, 1)); // After newline
770    }
771}