turbovault_core/
models.rs

1//! Core data models representing Obsidian vault elements.
2//!
3//! These types are designed to be:
4//! - **Serializable**: All types derive Serialize/Deserialize
5//! - **Debuggable**: Derive Debug for easy inspection
6//! - **Cloneable**: `Arc<T>` friendly for shared ownership
7//! - **Type-Safe**: Enums replace magic strings
8//!
9//! The types roughly correspond to Python dataclasses in the reference implementation.
10
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13use std::path::PathBuf;
14
15/// Position in source text (line, column, byte offset)
16#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
17pub struct SourcePosition {
18    pub line: usize,
19    pub column: usize,
20    pub offset: usize,
21    pub length: usize,
22}
23
24impl SourcePosition {
25    /// Create a new source position
26    pub fn new(line: usize, column: usize, offset: usize, length: usize) -> Self {
27        Self {
28            line,
29            column,
30            offset,
31            length,
32        }
33    }
34
35    /// Create position at start
36    pub fn start() -> Self {
37        Self {
38            line: 0,
39            column: 0,
40            offset: 0,
41            length: 0,
42        }
43    }
44
45    /// Create position from byte offset by computing line and column.
46    ///
47    /// This is O(n) where n is the offset - suitable for single-use cases.
48    /// For bulk operations, use `from_offset_indexed` with a pre-computed `LineIndex`.
49    ///
50    /// Line numbers start at 1, column numbers start at 1.
51    pub fn from_offset(content: &str, offset: usize, length: usize) -> Self {
52        let before = &content[..offset.min(content.len())];
53        let line = before.matches('\n').count() + 1;
54        let column = before
55            .rfind('\n')
56            .map(|pos| offset - pos)
57            .unwrap_or(offset + 1);
58
59        Self {
60            line,
61            column,
62            offset,
63            length,
64        }
65    }
66
67    /// Create position from byte offset using a pre-computed line index.
68    ///
69    /// This is O(log n) - use for bulk parsing operations.
70    pub fn from_offset_indexed(index: &LineIndex, offset: usize, length: usize) -> Self {
71        let (line, column) = index.line_col(offset);
72        Self {
73            line,
74            column,
75            offset,
76            length,
77        }
78    }
79}
80
81/// Pre-computed line starts for O(log n) line/column lookup.
82///
83/// Build once per document, then use for all position lookups.
84/// This is essential for efficient parsing of documents with many OFM elements.
85///
86/// # Example
87/// ```
88/// use turbovault_core::{LineIndex, SourcePosition};
89///
90/// let content = "Line 1\nLine 2\nLine 3";
91/// let index = LineIndex::new(content);
92///
93/// // O(log n) lookup instead of O(n)
94/// let pos = SourcePosition::from_offset_indexed(&index, 7, 6);
95/// assert_eq!(pos.line, 2);
96/// assert_eq!(pos.column, 1);
97/// ```
98#[derive(Debug, Clone)]
99pub struct LineIndex {
100    /// Byte offsets where each line starts (line 1 = index 0)
101    line_starts: Vec<usize>,
102}
103
104impl LineIndex {
105    /// Build line index in O(n) - do once per document.
106    pub fn new(content: &str) -> Self {
107        let mut line_starts = vec![0];
108        for (i, ch) in content.char_indices() {
109            if ch == '\n' {
110                line_starts.push(i + 1);
111            }
112        }
113        Self { line_starts }
114    }
115
116    /// Get (line, column) for a byte offset in O(log n) via binary search.
117    ///
118    /// Line numbers start at 1, column numbers start at 1.
119    pub fn line_col(&self, offset: usize) -> (usize, usize) {
120        // Binary search to find which line contains this offset
121        let line_idx = self.line_starts.partition_point(|&start| start <= offset);
122        let line = line_idx.max(1); // Line numbers are 1-indexed
123        let line_start = self
124            .line_starts
125            .get(line_idx.saturating_sub(1))
126            .copied()
127            .unwrap_or(0);
128        let column = offset - line_start + 1; // Column numbers are 1-indexed
129        (line, column)
130    }
131
132    /// Get the byte offset where a line starts.
133    pub fn line_start(&self, line: usize) -> Option<usize> {
134        if line == 0 {
135            return None;
136        }
137        self.line_starts.get(line - 1).copied()
138    }
139
140    /// Get total number of lines.
141    pub fn line_count(&self) -> usize {
142        self.line_starts.len()
143    }
144}
145
146/// Type of link in Obsidian content
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
148pub enum LinkType {
149    /// Wikilink: `[[Note]]`
150    WikiLink,
151    /// Embedded note: `![[Note]]`
152    Embed,
153    /// Block reference: `[[Note#^block]]`
154    BlockRef,
155    /// Heading reference: `[[Note#Heading]]` or `file.md#section`
156    HeadingRef,
157    /// Same-document anchor: `#section` (no file reference)
158    Anchor,
159    /// Markdown link: `[text](url)` to relative file
160    MarkdownLink,
161    /// External URL: `http://...`, `https://...`, `mailto:...`
162    ExternalLink,
163}
164
165/// A link in vault content
166#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
167pub struct Link {
168    pub type_: LinkType,
169    pub source_file: PathBuf,
170    pub target: String,
171    pub display_text: Option<String>,
172    pub position: SourcePosition,
173    pub resolved_target: Option<PathBuf>,
174    pub is_valid: bool,
175}
176
177impl Link {
178    /// Create a new link
179    pub fn new(
180        type_: LinkType,
181        source_file: PathBuf,
182        target: String,
183        position: SourcePosition,
184    ) -> Self {
185        Self {
186            type_,
187            source_file,
188            target,
189            display_text: None,
190            position,
191            resolved_target: None,
192            is_valid: true,
193        }
194    }
195}
196
197/// A heading in vault content
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct Heading {
200    pub text: String,
201    pub level: u8, // 1-6
202    pub position: SourcePosition,
203    pub anchor: Option<String>,
204}
205
206/// A tag in vault content
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct Tag {
209    pub name: String,
210    pub position: SourcePosition,
211    pub is_nested: bool, // #parent/child
212}
213
214/// A task item in vault content
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct TaskItem {
217    pub content: String,
218    pub is_completed: bool,
219    pub position: SourcePosition,
220    pub due_date: Option<String>,
221}
222
223/// Type of callout block
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
225pub enum CalloutType {
226    Note,
227    Tip,
228    Info,
229    Todo,
230    Important,
231    Success,
232    Question,
233    Warning,
234    Failure,
235    Danger,
236    Bug,
237    Example,
238    Quote,
239}
240
241/// A callout block in vault content
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct Callout {
244    pub type_: CalloutType,
245    pub title: Option<String>,
246    pub content: String,
247    pub position: SourcePosition,
248    pub is_foldable: bool,
249}
250
251/// A block in vault content (Obsidian block reference with ^id)
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct Block {
254    pub content: String,
255    pub block_id: Option<String>,
256    pub position: SourcePosition,
257    pub type_: String, // paragraph, heading, list_item, etc.
258}
259
260// ============================================================================
261// Content Block Types (for full markdown parsing)
262// ============================================================================
263
264/// A parsed content block in a markdown document.
265///
266/// These represent the block-level structure of markdown content,
267/// similar to an AST but optimized for consumption by tools like treemd.
268#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
269#[serde(tag = "type", rename_all = "lowercase")]
270pub enum ContentBlock {
271    /// A heading (# H1, ## H2, etc.)
272    Heading {
273        level: usize,
274        content: String,
275        inline: Vec<InlineElement>,
276        anchor: Option<String>,
277    },
278    /// A paragraph of text
279    Paragraph {
280        content: String,
281        inline: Vec<InlineElement>,
282    },
283    /// A fenced or indented code block
284    Code {
285        language: Option<String>,
286        content: String,
287        start_line: usize,
288        end_line: usize,
289    },
290    /// An ordered or unordered list
291    List { ordered: bool, items: Vec<ListItem> },
292    /// A blockquote (> text)
293    Blockquote {
294        content: String,
295        blocks: Vec<ContentBlock>,
296    },
297    /// A table with headers and rows
298    Table {
299        headers: Vec<String>,
300        alignments: Vec<TableAlignment>,
301        rows: Vec<Vec<String>>,
302    },
303    /// An image (standalone, not inline)
304    Image {
305        alt: String,
306        src: String,
307        title: Option<String>,
308    },
309    /// A horizontal rule (---, ***, ___)
310    HorizontalRule,
311    /// HTML <details><summary> block
312    Details {
313        summary: String,
314        content: String,
315        blocks: Vec<ContentBlock>,
316    },
317}
318
319impl ContentBlock {
320    /// Extract plain text from this content block.
321    ///
322    /// Returns only the visible text content, stripping markdown syntax.
323    /// This is useful for search indexing, accessibility, and accurate word counts.
324    ///
325    /// # Example
326    /// ```
327    /// use turbovault_core::{ContentBlock, InlineElement};
328    ///
329    /// let block = ContentBlock::Paragraph {
330    ///     content: "[Overview](#overview) and **bold**".to_string(),
331    ///     inline: vec![
332    ///         InlineElement::Link {
333    ///             text: "Overview".to_string(),
334    ///             url: "#overview".to_string(),
335    ///             title: None,
336    ///         },
337    ///         InlineElement::Text { value: " and ".to_string() },
338    ///         InlineElement::Strong { value: "bold".to_string() },
339    ///     ],
340    /// };
341    /// assert_eq!(block.to_plain_text(), "Overview and bold");
342    /// ```
343    #[must_use]
344    pub fn to_plain_text(&self) -> String {
345        match self {
346            Self::Heading { inline, .. } | Self::Paragraph { inline, .. } => {
347                inline.iter().map(InlineElement::to_plain_text).collect()
348            }
349            Self::Code { content, .. } => content.clone(),
350            Self::List { items, .. } => items
351                .iter()
352                .map(ListItem::to_plain_text)
353                .collect::<Vec<_>>()
354                .join("\n"),
355            Self::Blockquote { blocks, .. } => blocks
356                .iter()
357                .map(Self::to_plain_text)
358                .collect::<Vec<_>>()
359                .join("\n"),
360            Self::Table { headers, rows, .. } => {
361                let header_text = headers.join("\t");
362                let row_texts: Vec<String> = rows.iter().map(|row| row.join("\t")).collect();
363                if row_texts.is_empty() {
364                    header_text
365                } else {
366                    format!("{}\n{}", header_text, row_texts.join("\n"))
367                }
368            }
369            Self::Image { alt, .. } => alt.clone(),
370            Self::HorizontalRule => String::new(),
371            Self::Details {
372                summary, blocks, ..
373            } => {
374                let blocks_text: String = blocks
375                    .iter()
376                    .map(Self::to_plain_text)
377                    .collect::<Vec<_>>()
378                    .join("\n");
379                if blocks_text.is_empty() {
380                    summary.clone()
381                } else {
382                    format!("{}\n{}", summary, blocks_text)
383                }
384            }
385        }
386    }
387}
388
389/// An inline element within a block.
390///
391/// These represent inline formatting and links within text content.
392#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
393#[serde(tag = "type", rename_all = "lowercase")]
394pub enum InlineElement {
395    /// Plain text
396    Text { value: String },
397    /// Bold text (**text** or __text__)
398    Strong { value: String },
399    /// Italic text (*text* or _text_)
400    Emphasis { value: String },
401    /// Inline code (`code`)
402    Code { value: String },
403    /// A link [text](url)
404    Link {
405        text: String,
406        url: String,
407        title: Option<String>,
408    },
409    /// An inline image ![alt](src)
410    Image {
411        alt: String,
412        src: String,
413        title: Option<String>,
414    },
415    /// Strikethrough text (~~text~~)
416    Strikethrough { value: String },
417}
418
419impl InlineElement {
420    /// Extract plain text from this inline element.
421    ///
422    /// Returns only the visible text content, stripping markdown syntax.
423    /// For links, returns the link text (not the URL).
424    /// For images, returns the alt text.
425    ///
426    /// # Example
427    /// ```
428    /// use turbovault_core::InlineElement;
429    ///
430    /// let link = InlineElement::Link {
431    ///     text: "Overview".to_string(),
432    ///     url: "#overview".to_string(),
433    ///     title: None,
434    /// };
435    /// assert_eq!(link.to_plain_text(), "Overview");
436    /// ```
437    #[must_use]
438    pub fn to_plain_text(&self) -> &str {
439        match self {
440            Self::Text { value }
441            | Self::Strong { value }
442            | Self::Emphasis { value }
443            | Self::Code { value }
444            | Self::Strikethrough { value } => value,
445            Self::Link { text, .. } => text,
446            Self::Image { alt, .. } => alt,
447        }
448    }
449}
450
451/// A list item with optional checkbox and nested content.
452#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
453pub struct ListItem {
454    /// For task lists: Some(true) = checked, Some(false) = unchecked, None = not a task
455    pub checked: Option<bool>,
456    /// Raw text content of the item
457    pub content: String,
458    /// Parsed inline elements
459    pub inline: Vec<InlineElement>,
460    /// Nested blocks (e.g., code blocks, sub-lists inside list items)
461    #[serde(default, skip_serializing_if = "Vec::is_empty")]
462    pub blocks: Vec<ContentBlock>,
463}
464
465impl ListItem {
466    /// Extract plain text from this list item.
467    ///
468    /// Returns the visible text content by joining inline elements.
469    /// Includes nested block content recursively.
470    ///
471    /// # Example
472    /// ```
473    /// use turbovault_core::{ListItem, InlineElement};
474    ///
475    /// let item = ListItem {
476    ///     checked: Some(false),
477    ///     content: "Todo item".to_string(),
478    ///     inline: vec![InlineElement::Text { value: "Todo item".to_string() }],
479    ///     blocks: vec![],
480    /// };
481    /// assert_eq!(item.to_plain_text(), "Todo item");
482    /// ```
483    #[must_use]
484    pub fn to_plain_text(&self) -> String {
485        let mut result = String::new();
486
487        // Extract text from inline elements
488        for elem in &self.inline {
489            result.push_str(elem.to_plain_text());
490        }
491
492        // Include nested blocks
493        for block in &self.blocks {
494            if !result.is_empty() && !result.ends_with('\n') {
495                result.push('\n');
496            }
497            result.push_str(&block.to_plain_text());
498        }
499
500        result
501    }
502}
503
504/// Table column alignment.
505#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
506#[serde(rename_all = "lowercase")]
507pub enum TableAlignment {
508    Left,
509    Center,
510    Right,
511    None,
512}
513
514/// YAML frontmatter
515#[derive(Debug, Clone, Serialize, Deserialize)]
516pub struct Frontmatter {
517    pub data: HashMap<String, serde_json::Value>,
518    pub position: SourcePosition,
519}
520
521impl Frontmatter {
522    /// Extract tags from frontmatter
523    pub fn tags(&self) -> Vec<String> {
524        match self.data.get("tags") {
525            Some(serde_json::Value::String(s)) => vec![s.clone()],
526            Some(serde_json::Value::Array(arr)) => arr
527                .iter()
528                .filter_map(|v| v.as_str().map(|s| s.to_string()))
529                .collect(),
530            _ => vec![],
531        }
532    }
533
534    /// Extract aliases from frontmatter
535    pub fn aliases(&self) -> Vec<String> {
536        match self.data.get("aliases") {
537            Some(serde_json::Value::String(s)) => vec![s.clone()],
538            Some(serde_json::Value::Array(arr)) => arr
539                .iter()
540                .filter_map(|v| v.as_str().map(|s| s.to_string()))
541                .collect(),
542            _ => vec![],
543        }
544    }
545}
546
547/// File metadata
548#[derive(Debug, Clone, Serialize, Deserialize)]
549pub struct FileMetadata {
550    pub path: PathBuf,
551    pub size: u64,
552    pub created_at: f64,
553    pub modified_at: f64,
554    pub checksum: String,
555    pub is_attachment: bool,
556}
557
558/// A complete vault file with parsed content
559#[derive(Debug, Clone, Serialize, Deserialize)]
560pub struct VaultFile {
561    pub path: PathBuf,
562    pub content: String,
563    pub metadata: FileMetadata,
564
565    // Parsed elements
566    pub frontmatter: Option<Frontmatter>,
567    pub headings: Vec<Heading>,
568    pub links: Vec<Link>,
569    pub backlinks: HashSet<Link>,
570    pub blocks: Vec<Block>,
571    pub tags: Vec<Tag>,
572    pub callouts: Vec<Callout>,
573    pub tasks: Vec<TaskItem>,
574
575    // Cache status
576    pub is_parsed: bool,
577    pub parse_error: Option<String>,
578    pub last_parsed: Option<f64>,
579}
580
581impl VaultFile {
582    /// Create a new vault file
583    pub fn new(path: PathBuf, content: String, metadata: FileMetadata) -> Self {
584        Self {
585            path,
586            content,
587            metadata,
588            frontmatter: None,
589            headings: vec![],
590            links: vec![],
591            backlinks: HashSet::new(),
592            blocks: vec![],
593            tags: vec![],
594            callouts: vec![],
595            tasks: vec![],
596            is_parsed: false,
597            parse_error: None,
598            last_parsed: None,
599        }
600    }
601
602    /// Get outgoing links
603    pub fn outgoing_links(&self) -> HashSet<&str> {
604        self.links
605            .iter()
606            .filter(|link| matches!(link.type_, LinkType::WikiLink | LinkType::Embed))
607            .map(|link| link.target.as_str())
608            .collect()
609    }
610
611    /// Get headings indexed by text
612    pub fn headings_by_text(&self) -> HashMap<&str, &Heading> {
613        self.headings.iter().map(|h| (h.text.as_str(), h)).collect()
614    }
615
616    /// Get blocks with IDs
617    pub fn blocks_with_ids(&self) -> HashMap<&str, &Block> {
618        self.blocks
619            .iter()
620            .filter_map(|b| b.block_id.as_deref().map(|id| (id, b)))
621            .collect()
622    }
623
624    /// Check if file contains a tag
625    pub fn has_tag(&self, tag: &str) -> bool {
626        if let Some(fm) = &self.frontmatter
627            && fm.tags().contains(&tag.to_string())
628        {
629            return true;
630        }
631
632        self.tags.iter().any(|t| t.name == tag)
633    }
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639
640    #[test]
641    fn test_source_position() {
642        let pos = SourcePosition::new(5, 10, 100, 20);
643        assert_eq!(pos.line, 5);
644        assert_eq!(pos.column, 10);
645        assert_eq!(pos.offset, 100);
646        assert_eq!(pos.length, 20);
647    }
648
649    #[test]
650    fn test_frontmatter_tags() {
651        let mut data = HashMap::new();
652        data.insert(
653            "tags".to_string(),
654            serde_json::Value::Array(vec![
655                serde_json::Value::String("rust".to_string()),
656                serde_json::Value::String("mcp".to_string()),
657            ]),
658        );
659
660        let fm = Frontmatter {
661            data,
662            position: SourcePosition::start(),
663        };
664
665        let tags = fm.tags();
666        assert_eq!(tags.len(), 2);
667        assert!(tags.contains(&"rust".to_string()));
668    }
669
670    #[test]
671    fn test_line_index_single_line() {
672        let content = "Hello, world!";
673        let index = LineIndex::new(content);
674
675        assert_eq!(index.line_count(), 1);
676        assert_eq!(index.line_col(0), (1, 1)); // 'H'
677        assert_eq!(index.line_col(7), (1, 8)); // 'w'
678    }
679
680    #[test]
681    fn test_line_index_multiline() {
682        let content = "Line 1\nLine 2\nLine 3";
683        let index = LineIndex::new(content);
684
685        assert_eq!(index.line_count(), 3);
686
687        // Line 1
688        assert_eq!(index.line_col(0), (1, 1)); // 'L' of Line 1
689        assert_eq!(index.line_col(5), (1, 6)); // '1'
690
691        // Line 2 (offset 7 = first char after newline)
692        assert_eq!(index.line_col(7), (2, 1)); // 'L' of Line 2
693        assert_eq!(index.line_col(13), (2, 7)); // '2'
694
695        // Line 3 (offset 14 = first char after second newline)
696        assert_eq!(index.line_col(14), (3, 1)); // 'L' of Line 3
697    }
698
699    #[test]
700    fn test_line_index_line_start() {
701        let content = "Line 1\nLine 2\nLine 3";
702        let index = LineIndex::new(content);
703
704        assert_eq!(index.line_start(1), Some(0));
705        assert_eq!(index.line_start(2), Some(7));
706        assert_eq!(index.line_start(3), Some(14));
707        assert_eq!(index.line_start(0), None); // Invalid line
708        assert_eq!(index.line_start(4), None); // Beyond content
709    }
710
711    #[test]
712    fn test_source_position_from_offset() {
713        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
714
715        // Position of [[Link]] starts at offset 14
716        let pos = SourcePosition::from_offset(content, 14, 8);
717        assert_eq!(pos.line, 2);
718        assert_eq!(pos.column, 8); // "Line 2 " = 7 chars, so column 8
719        assert_eq!(pos.offset, 14);
720        assert_eq!(pos.length, 8);
721    }
722
723    #[test]
724    fn test_source_position_from_offset_indexed() {
725        let content = "Line 1\nLine 2 [[Link]] here\nLine 3";
726        let index = LineIndex::new(content);
727
728        // Same test as above but using indexed lookup
729        let pos = SourcePosition::from_offset_indexed(&index, 14, 8);
730        assert_eq!(pos.line, 2);
731        assert_eq!(pos.column, 8);
732        assert_eq!(pos.offset, 14);
733        assert_eq!(pos.length, 8);
734    }
735
736    #[test]
737    fn test_source_position_first_line() {
738        let content = "[[Link]] at start";
739
740        let pos = SourcePosition::from_offset(content, 0, 8);
741        assert_eq!(pos.line, 1);
742        assert_eq!(pos.column, 1);
743    }
744
745    #[test]
746    fn test_line_index_empty_content() {
747        let content = "";
748        let index = LineIndex::new(content);
749
750        assert_eq!(index.line_count(), 1); // Even empty content has "line 1"
751        assert_eq!(index.line_col(0), (1, 1));
752    }
753
754    #[test]
755    fn test_line_index_trailing_newline() {
756        let content = "Line 1\n";
757        let index = LineIndex::new(content);
758
759        assert_eq!(index.line_count(), 2); // Line 1 + empty line 2
760        assert_eq!(index.line_col(6), (1, 7)); // The newline itself
761        assert_eq!(index.line_col(7), (2, 1)); // After newline
762    }
763}