1use std::collections::HashMap;
13use std::fs;
14use std::path::{Path, PathBuf};
15
16use lexa_core::{
17 EmbeddingConfig, IndexStats, LexaDb, LexaError, PreprocessOutput, Preprocessor, SearchOptions,
18 SearchTier, Transaction,
19};
20use rusqlite::{params, OptionalExtension};
21use serde::{Deserialize, Serialize};
22
23use crate::frontmatter::{self, Frontmatter};
24use crate::tags;
25use crate::wikilinks::{self, LinkKind, Wikilink};
26use crate::{schema, Result};
27
28pub struct LexaObsidianDb {
30 inner: LexaDb,
31 vault_root: PathBuf,
32}
33
34#[derive(Debug, Clone, Serialize)]
35pub struct IndexReport {
36 pub notes_seen: usize,
37 pub notes_indexed: usize,
38 pub notes_deleted: usize,
43 pub links: usize,
44 pub tags: usize,
45 pub blocks: usize,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize, Default)]
49pub struct SearchNotesOptions {
50 pub query: String,
51 #[serde(default)]
52 pub tier: SearchTier,
53 #[serde(default = "default_limit")]
54 pub limit: usize,
55 #[serde(default)]
56 pub tags: Vec<String>,
57 #[serde(default)]
59 pub folders: Vec<String>,
60 #[serde(default)]
61 pub additional_queries: Vec<String>,
62}
63
64fn default_limit() -> usize {
65 10
66}
67
68#[derive(Debug, Clone, Serialize)]
69pub struct NoteHit {
70 pub path: String,
71 pub title: String,
72 pub score: f32,
73 pub excerpt: String,
74 pub heading: Option<String>,
75 pub line_start: i64,
76 pub line_end: i64,
77 pub tags: Vec<String>,
78 pub breakdown: lexa_core::TierBreakdown,
79}
80
81#[derive(Debug, Clone, Serialize)]
82pub struct Backlink {
83 pub src_path: String,
84 pub src_title: Option<String>,
85 pub alias: Option<String>,
86 pub header: Option<String>,
87 pub block_id: Option<String>,
88 pub kind: String,
89}
90
91#[derive(Debug, Clone, Serialize)]
92pub struct TagCount {
93 pub tag: String,
94 pub count: i64,
95}
96
97#[derive(Debug, Clone, Serialize)]
98pub struct LinkRef {
99 pub target_name: String,
100 pub target_path: Option<String>,
101 pub header: Option<String>,
102 pub block_id: Option<String>,
103 pub alias: Option<String>,
104 pub kind: String,
105}
106
107#[derive(Debug, Clone, Serialize)]
108pub struct Note {
109 pub path: String,
110 pub title: String,
111 pub frontmatter: serde_json::Value,
112 pub body: String,
113 pub tags: Vec<String>,
114 pub outgoing: Vec<LinkRef>,
115 pub incoming: Vec<Backlink>,
116}
117
118#[derive(Debug, Clone, Serialize)]
119pub struct VaultStatus {
120 pub stats: IndexStats,
121 pub vault_root: PathBuf,
122 pub note_count: i64,
123 pub tag_count: i64,
124 pub link_count: i64,
125 pub needs_index: bool,
126}
127
128impl LexaObsidianDb {
129 pub fn open(
133 db_path: impl AsRef<Path>,
134 vault_root: impl AsRef<Path>,
135 embedding_config: EmbeddingConfig,
136 ) -> Result<Self> {
137 let inner = LexaDb::open(db_path, embedding_config)?;
138 schema::migrate(inner.conn())?;
139 Ok(Self {
140 inner,
141 vault_root: vault_root.as_ref().to_path_buf(),
142 })
143 }
144
145 pub fn vault_root(&self) -> &Path {
146 &self.vault_root
147 }
148
149 pub fn inner(&self) -> &LexaDb {
150 &self.inner
151 }
152
153 pub fn index_vault(&mut self) -> Result<IndexReport> {
160 let mut report = IndexReport {
161 notes_seen: 0,
162 notes_indexed: 0,
163 notes_deleted: 0,
164 links: 0,
165 tags: 0,
166 blocks: 0,
167 };
168
169 let preprocessor = ObsidianPreprocessor;
170
171 let report_links = std::cell::Cell::new(0usize);
172 let report_tags = std::cell::Cell::new(0usize);
173
174 let indexed = self.inner.index_path_with_preprocessor::<NoteSidecar>(
175 &self.vault_root,
176 Some(&preprocessor),
177 |tx, doc_id, payload| {
178 if !payload.is_obsidian_note {
179 return Ok(());
180 }
181 write_metadata_tx(tx, doc_id, &payload.title, &payload.frontmatter)?;
182 replace_tags_tx(tx, doc_id, &payload.tags)?;
183 replace_links_tx(tx, doc_id, &payload.links)?;
184 report_tags.set(report_tags.get() + payload.tags.len());
185 report_links.set(report_links.get() + payload.links.len());
186 Ok(())
187 },
188 )?;
189 report.notes_indexed = indexed;
190 report.tags = report_tags.get();
191 report.links = report_links.get();
192
193 report.notes_deleted = self.purge_orphans()?;
197
198 let docs = self.markdown_documents()?;
200 report.notes_seen = docs.len();
201 for (doc_id, _abs_path) in &docs {
202 report.blocks += self.refresh_blocks(*doc_id)?;
203 }
204
205 self.resolve_pending_links()?;
208
209 Ok(report)
210 }
211
212 fn purge_orphans(&self) -> Result<usize> {
217 let docs = self.markdown_documents()?;
218 let mut orphan_ids = Vec::new();
219 for (doc_id, path) in docs {
220 if !path.exists() {
221 orphan_ids.push(doc_id);
222 }
223 }
224 if orphan_ids.is_empty() {
225 return Ok(0);
226 }
227 let conn = self.inner.conn();
228 for id in &orphan_ids {
229 conn.execute("DELETE FROM documents WHERE id = ?1", params![id])?;
230 }
231 Ok(orphan_ids.len())
232 }
233
234 pub fn search_notes(&self, opts: &SearchNotesOptions) -> Result<Vec<NoteHit>> {
236 let hits = self.inner.search(&SearchOptions {
237 query: opts.query.clone(),
238 tier: opts.tier,
239 limit: opts.limit.saturating_mul(2).max(opts.limit),
240 additional_queries: opts.additional_queries.clone(),
241 })?;
242 let mut out = Vec::with_capacity(hits.len());
243 for hit in hits {
244 if !self.path_passes_folder_filter(&hit.path, &opts.folders) {
245 continue;
246 }
247 let doc_id = match self.lookup_doc_id(&hit.path)? {
248 Some(id) => id,
249 None => continue,
250 };
251 let tags = self.tags_for_doc(doc_id)?;
252 if !opts.tags.is_empty() {
253 let note_tags: std::collections::HashSet<&String> = tags.iter().collect();
254 if !opts.tags.iter().any(|t| note_tags.contains(t)) {
255 continue;
256 }
257 }
258 let title = self
259 .title_for_doc(doc_id)?
260 .unwrap_or_else(|| file_stem_of(&hit.path));
261 out.push(NoteHit {
262 path: hit.path.clone(),
263 title,
264 score: hit.score,
265 excerpt: hit.excerpt.clone(),
266 heading: hit.heading.clone(),
267 line_start: hit.line_start,
268 line_end: hit.line_end,
269 tags,
270 breakdown: hit.breakdown.clone(),
271 });
272 if out.len() >= opts.limit {
273 break;
274 }
275 }
276 Ok(out)
277 }
278
279 pub fn find_backlinks(&self, note: &str) -> Result<Vec<Backlink>> {
280 let conn = self.inner.conn();
281 let resolved = self.resolve_note_argument(note)?;
282
283 let mut stmt = conn.prepare(
284 "SELECT
285 d.path,
286 m.title,
287 nl.alias,
288 nl.header,
289 nl.block_id,
290 nl.kind
291 FROM note_links nl
292 JOIN documents d ON d.id = nl.src_doc_id
293 LEFT JOIN note_metadata m ON m.doc_id = d.id
294 WHERE nl.target_path = ?1 OR LOWER(nl.target_name) = LOWER(?2)
295 ORDER BY d.path",
296 )?;
297
298 let rows = stmt.query_map(
299 params![resolved.path.as_deref(), resolved.name.as_str()],
300 |row| {
301 Ok(Backlink {
302 src_path: row.get(0)?,
303 src_title: row.get::<_, Option<String>>(1)?,
304 alias: row.get::<_, Option<String>>(2)?,
305 header: row.get::<_, Option<String>>(3)?,
306 block_id: row.get::<_, Option<String>>(4)?,
307 kind: row.get(5)?,
308 })
309 },
310 )?;
311 rows.collect::<std::result::Result<Vec<_>, _>>()
312 .map_err(LexaError::from)
313 }
314
315 pub fn list_tags(&self, prefix: Option<&str>, limit: usize) -> Result<Vec<TagCount>> {
316 let conn = self.inner.conn();
317 let limit = limit.max(1) as i64;
318 let rows: Vec<TagCount> = if let Some(prefix) = prefix {
319 let pattern = format!("{}%", prefix.to_ascii_lowercase());
320 let mut stmt = conn.prepare(
321 "SELECT tag, COUNT(*) FROM note_tags
322 WHERE tag LIKE ?1
323 GROUP BY tag ORDER BY COUNT(*) DESC, tag ASC LIMIT ?2",
324 )?;
325 let rows: Result<Vec<_>> = stmt
326 .query_map(params![pattern, limit], |row| {
327 Ok(TagCount {
328 tag: row.get(0)?,
329 count: row.get(1)?,
330 })
331 })?
332 .collect::<std::result::Result<Vec<_>, _>>()
333 .map_err(LexaError::from);
334 rows?
335 } else {
336 let mut stmt = conn.prepare(
337 "SELECT tag, COUNT(*) FROM note_tags
338 GROUP BY tag ORDER BY COUNT(*) DESC, tag ASC LIMIT ?1",
339 )?;
340 let rows: Result<Vec<_>> = stmt
341 .query_map(params![limit], |row| {
342 Ok(TagCount {
343 tag: row.get(0)?,
344 count: row.get(1)?,
345 })
346 })?
347 .collect::<std::result::Result<Vec<_>, _>>()
348 .map_err(LexaError::from);
349 rows?
350 };
351 Ok(rows)
352 }
353
354 pub fn get_note(&self, note: &str, block: Option<&str>) -> Result<Note> {
355 let resolved = self.resolve_note_argument(note)?;
356 let doc_path = resolved
357 .path
358 .clone()
359 .ok_or_else(|| LexaError::InvalidPath(note.to_string()))?;
360 let bytes = fs::read(&doc_path)?;
361 let text = String::from_utf8_lossy(&bytes).into_owned();
362 let stem = Path::new(&doc_path)
363 .file_stem()
364 .and_then(|s| s.to_str())
365 .unwrap_or_default();
366 let (fm, body_str, _) = frontmatter::parse(&text);
367 let title = frontmatter::resolve_title(&fm, body_str, stem);
368 let body = body_str.to_string();
369
370 let conn = self.inner.conn();
371 let doc_id = self
372 .lookup_doc_id(&doc_path)?
373 .ok_or_else(|| LexaError::InvalidPath(format!("note not indexed: {doc_path}")))?;
374 let tags = self.tags_for_doc(doc_id)?;
375
376 let mut outgoing_stmt = conn.prepare(
377 "SELECT target_name, target_path, header, block_id, alias, kind
378 FROM note_links WHERE src_doc_id = ?1",
379 )?;
380 let outgoing = outgoing_stmt
381 .query_map(params![doc_id], |row| {
382 Ok(LinkRef {
383 target_name: row.get(0)?,
384 target_path: row.get::<_, Option<String>>(1)?,
385 header: row.get::<_, Option<String>>(2)?,
386 block_id: row.get::<_, Option<String>>(3)?,
387 alias: row.get::<_, Option<String>>(4)?,
388 kind: row.get(5)?,
389 })
390 })?
391 .collect::<std::result::Result<Vec<_>, _>>()?;
392
393 let incoming = self.find_backlinks(&doc_path)?;
394
395 let final_body = if let Some(block_id) = block {
396 self.body_for_block(doc_id, &body, block_id)?
397 .unwrap_or(body)
398 } else {
399 body
400 };
401
402 Ok(Note {
403 path: doc_path,
404 title,
405 frontmatter: frontmatter_to_json(&fm),
406 body: final_body,
407 tags,
408 outgoing,
409 incoming,
410 })
411 }
412
413 pub fn get_similar(&self, note: &str, limit: usize) -> Result<Vec<NoteHit>> {
414 let resolved = self.resolve_note_argument(note)?;
415 let doc_path = resolved
416 .path
417 .ok_or_else(|| LexaError::InvalidPath(note.to_string()))?;
418 let bytes = fs::read(&doc_path)?;
419 let text = String::from_utf8_lossy(&bytes).into_owned();
420 let (_, body, _) = frontmatter::parse(&text);
421 let snippet: String = body.chars().take(2_000).collect();
423 let opts = SearchNotesOptions {
424 query: snippet,
425 tier: SearchTier::Fast,
426 limit: limit.saturating_mul(2).max(limit),
427 tags: Vec::new(),
428 folders: Vec::new(),
429 additional_queries: Vec::new(),
430 };
431 let hits = self.search_notes(&opts)?;
432 Ok(hits
433 .into_iter()
434 .filter(|h| h.path != doc_path)
435 .take(limit)
436 .collect())
437 }
438
439 pub fn vault_status(&self) -> Result<VaultStatus> {
440 let stats = self.inner.stats()?;
441 let conn = self.inner.conn();
442 let note_count: i64 = conn
443 .query_row(
444 "SELECT COUNT(*) FROM documents WHERE LOWER(path) LIKE '%.md'",
445 [],
446 |row| row.get(0),
447 )
448 .unwrap_or(0);
449 let tag_count: i64 = conn
450 .query_row("SELECT COUNT(DISTINCT tag) FROM note_tags", [], |row| {
451 row.get(0)
452 })
453 .unwrap_or(0);
454 let link_count: i64 = conn
455 .query_row("SELECT COUNT(*) FROM note_links", [], |row| row.get(0))
456 .unwrap_or(0);
457 let needs_index = note_count == 0;
458 Ok(VaultStatus {
459 stats,
460 vault_root: self.vault_root.clone(),
461 note_count,
462 tag_count,
463 link_count,
464 needs_index,
465 })
466 }
467
468 pub fn purge_vault(&mut self) -> Result<usize> {
469 self.inner.purge_path(self.vault_root.clone())
471 }
472
473 fn markdown_documents(&self) -> Result<Vec<(i64, PathBuf)>> {
476 let mut stmt = self
477 .inner
478 .conn()
479 .prepare("SELECT id, path FROM documents WHERE LOWER(path) LIKE '%.md' ORDER BY id")?;
480 let rows = stmt.query_map([], |row| {
481 let id: i64 = row.get(0)?;
482 let path: String = row.get(1)?;
483 Ok((id, PathBuf::from(path)))
484 })?;
485 rows.collect::<std::result::Result<Vec<_>, _>>()
486 .map_err(LexaError::from)
487 }
488
489 fn refresh_blocks(&self, doc_id: i64) -> Result<usize> {
494 let conn = self.inner.conn();
495 conn.execute("DELETE FROM note_blocks WHERE doc_id = ?1", params![doc_id])?;
496 let mut stmt = conn.prepare("SELECT id, text FROM chunks WHERE doc_id = ?1")?;
497 let rows = stmt
498 .query_map(params![doc_id], |row| {
499 let id: i64 = row.get(0)?;
500 let text: String = row.get(1)?;
501 Ok((id, text))
502 })?
503 .collect::<std::result::Result<Vec<_>, _>>()?;
504 drop(stmt);
505 let mut inserted = 0usize;
506 for (chunk_id, text) in rows {
507 if let Some(block_id) = trailing_block_id(&text) {
508 conn.execute(
509 "INSERT OR IGNORE INTO note_blocks(chunk_id, doc_id, block_id)
510 VALUES(?1, ?2, ?3)",
511 params![chunk_id, doc_id, block_id],
512 )?;
513 inserted += 1;
514 }
515 }
516 Ok(inserted)
517 }
518
519 fn resolve_pending_links(&self) -> Result<()> {
520 let conn = self.inner.conn();
521 let mut stmt = conn.prepare("SELECT path FROM documents WHERE LOWER(path) LIKE '%.md'")?;
522 let mut by_stem: HashMap<String, String> = HashMap::new();
523 for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
524 let path = row?;
525 let stem = file_stem_of(&path).to_ascii_lowercase();
526 by_stem.entry(stem).or_insert(path);
527 }
528 drop(stmt);
529
530 let mut update_stmt = conn.prepare(
531 "UPDATE note_links SET target_path = ?1
532 WHERE LOWER(target_name) = ?2 AND target_path IS NULL",
533 )?;
534 for (stem, path) in &by_stem {
535 update_stmt.execute(params![path, stem])?;
536 }
537 Ok(())
538 }
539
540 fn lookup_doc_id(&self, path: &str) -> Result<Option<i64>> {
541 let row: Option<i64> = self
542 .inner
543 .conn()
544 .query_row(
545 "SELECT id FROM documents WHERE path = ?1",
546 params![path],
547 |row| row.get(0),
548 )
549 .optional()?;
550 Ok(row)
551 }
552
553 fn title_for_doc(&self, doc_id: i64) -> Result<Option<String>> {
554 let row: Option<String> = self
555 .inner
556 .conn()
557 .query_row(
558 "SELECT title FROM note_metadata WHERE doc_id = ?1",
559 params![doc_id],
560 |row| row.get(0),
561 )
562 .optional()?;
563 Ok(row)
564 }
565
566 fn tags_for_doc(&self, doc_id: i64) -> Result<Vec<String>> {
567 let mut stmt = self
568 .inner
569 .conn()
570 .prepare("SELECT tag FROM note_tags WHERE doc_id = ?1 ORDER BY tag")?;
571 let rows = stmt.query_map(params![doc_id], |row| row.get::<_, String>(0))?;
572 Ok(rows.collect::<std::result::Result<Vec<_>, _>>()?)
573 }
574
575 fn body_for_block(
576 &self,
577 doc_id: i64,
578 full_body: &str,
579 block_id: &str,
580 ) -> Result<Option<String>> {
581 let key = block_id.trim_start_matches('^');
582 let mut stmt = self.inner.conn().prepare(
583 "SELECT c.text FROM chunks c
584 JOIN note_blocks b ON b.chunk_id = c.id
585 WHERE b.doc_id = ?1 AND b.block_id = ?2",
586 )?;
587 let row: Option<String> = stmt
588 .query_row(params![doc_id, key], |row| row.get(0))
589 .optional()?;
590 if row.is_some() {
593 return Ok(row);
594 }
595 let needle = format!("^{}", key);
596 if let Some(idx) = full_body.find(&needle) {
597 let start = full_body[..idx].rfind("\n\n").map(|p| p + 2).unwrap_or(0);
599 let end = full_body[idx..]
600 .find("\n\n")
601 .map(|p| idx + p)
602 .unwrap_or(full_body.len());
603 return Ok(Some(full_body[start..end].to_string()));
604 }
605 Ok(None)
606 }
607
608 fn resolve_note_argument(&self, note: &str) -> Result<ResolvedNote> {
609 let candidate = if Path::new(note).is_absolute() {
611 PathBuf::from(note)
612 } else {
613 self.vault_root.join(note)
614 };
615 if candidate.exists() {
616 let canonical = fs::canonicalize(&candidate)?;
617 let path = canonical.to_string_lossy().into_owned();
618 let name = canonical
619 .file_stem()
620 .and_then(|s| s.to_str())
621 .unwrap_or("")
622 .to_string();
623 return Ok(ResolvedNote {
624 path: Some(path),
625 name,
626 });
627 }
628 let stem = Path::new(note)
632 .file_stem()
633 .and_then(|s| s.to_str())
634 .unwrap_or(note)
635 .to_string();
636 let mut stmt = self.inner.conn().prepare(
637 "SELECT path FROM documents WHERE LOWER(path) LIKE '%' || LOWER(?1) || '.md'",
638 )?;
639 let path: Option<String> = stmt.query_row(params![stem], |row| row.get(0)).optional()?;
640 Ok(ResolvedNote { path, name: stem })
641 }
642
643 fn path_passes_folder_filter(&self, path: &str, folders: &[String]) -> bool {
644 if folders.is_empty() {
645 return true;
646 }
647 let path_str = match Path::new(path).strip_prefix(&self.vault_root) {
648 Ok(rel) => rel.to_string_lossy().into_owned(),
649 Err(_) => path.to_string(),
650 };
651 folders
652 .iter()
653 .any(|folder| path_str.starts_with(folder.as_str()))
654 }
655}
656
657struct ResolvedNote {
658 path: Option<String>,
659 name: String,
660}
661
662fn file_stem_of(path: &str) -> String {
663 Path::new(path)
664 .file_stem()
665 .and_then(|s| s.to_str())
666 .unwrap_or("")
667 .to_string()
668}
669
670fn trailing_block_id(text: &str) -> Option<String> {
671 let last = text.lines().rev().find(|l| !l.trim().is_empty())?;
672 let trimmed = last.trim();
673 let rest = trimmed
674 .strip_suffix(|c: char| !c.is_whitespace())
675 .map(|_| trimmed)?;
676 let _ = rest; let stripped = trimmed.split_whitespace().last()?;
678 let id = stripped.strip_prefix('^')?;
679 if id
680 .chars()
681 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
682 && !id.is_empty()
683 {
684 Some(id.to_string())
685 } else {
686 None
687 }
688}
689
690fn frontmatter_to_json(fm: &Frontmatter) -> serde_json::Value {
691 let mut map = serde_json::Map::new();
692 if let Some(title) = &fm.title {
693 map.insert("title".into(), serde_json::Value::String(title.clone()));
694 }
695 if !fm.aliases.is_empty() {
696 map.insert(
697 "aliases".into(),
698 serde_json::Value::Array(
699 fm.aliases
700 .iter()
701 .map(|s| serde_json::Value::String(s.clone()))
702 .collect(),
703 ),
704 );
705 }
706 if !fm.tags.is_empty() {
707 map.insert(
708 "tags".into(),
709 serde_json::Value::Array(
710 fm.tags
711 .iter()
712 .map(|s| serde_json::Value::String(s.clone()))
713 .collect(),
714 ),
715 );
716 }
717 for (k, v) in &fm.raw {
718 map.insert(k.clone(), serde_yaml_to_json(v));
719 }
720 serde_json::Value::Object(map)
721}
722
723fn serde_yaml_to_json(value: &serde_yaml::Value) -> serde_json::Value {
724 match value {
725 serde_yaml::Value::Null => serde_json::Value::Null,
726 serde_yaml::Value::Bool(b) => serde_json::Value::Bool(*b),
727 serde_yaml::Value::Number(n) => {
728 if let Some(i) = n.as_i64() {
729 serde_json::Value::Number(i.into())
730 } else if let Some(f) = n.as_f64() {
731 serde_json::Number::from_f64(f)
732 .map(serde_json::Value::Number)
733 .unwrap_or(serde_json::Value::Null)
734 } else {
735 serde_json::Value::Null
736 }
737 }
738 serde_yaml::Value::String(s) => serde_json::Value::String(s.clone()),
739 serde_yaml::Value::Sequence(seq) => {
740 serde_json::Value::Array(seq.iter().map(serde_yaml_to_json).collect())
741 }
742 serde_yaml::Value::Mapping(m) => {
743 let mut out = serde_json::Map::new();
744 for (k, v) in m {
745 let key = match k {
746 serde_yaml::Value::String(s) => s.clone(),
747 other => serde_yaml::to_string(other)
748 .unwrap_or_default()
749 .trim()
750 .to_string(),
751 };
752 out.insert(key, serde_yaml_to_json(v));
753 }
754 serde_json::Value::Object(out)
755 }
756 serde_yaml::Value::Tagged(tagged) => serde_yaml_to_json(&tagged.value),
757 }
758}
759
760impl std::str::FromStr for LinkKind {
764 type Err = std::convert::Infallible;
765
766 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
767 Ok(match s {
768 "embed" => LinkKind::Embed,
769 _ => LinkKind::Link,
770 })
771 }
772}
773
774#[derive(Default)]
776struct NoteSidecar {
777 is_obsidian_note: bool,
778 title: String,
779 frontmatter: Frontmatter,
780 tags: Vec<String>,
781 links: Vec<Wikilink>,
782}
783
784struct ObsidianPreprocessor;
787
788impl Preprocessor for ObsidianPreprocessor {
789 type Payload = NoteSidecar;
790
791 fn preprocess(
792 &self,
793 path: &Path,
794 bytes: &[u8],
795 ) -> Result<Option<PreprocessOutput<Self::Payload>>> {
796 let is_md = path
797 .extension()
798 .and_then(|e| e.to_str())
799 .map(|e| e.eq_ignore_ascii_case("md"))
800 .unwrap_or(false);
801 if !is_md {
802 return Ok(Some(PreprocessOutput {
803 text: String::from_utf8_lossy(bytes).replace("\r\n", "\n"),
804 payload: NoteSidecar::default(),
805 }));
806 }
807 let text = String::from_utf8_lossy(bytes).replace("\r\n", "\n");
808 let stem = path
809 .file_stem()
810 .and_then(|s| s.to_str())
811 .unwrap_or_default();
812 let (fm, body, _offset) = frontmatter::parse(&text);
813 let title = frontmatter::resolve_title(&fm, body, stem);
814 let extracted_tags = tags::extract(body, &fm);
815 let extracted_links = wikilinks::extract(body);
816 Ok(Some(PreprocessOutput {
817 text: body.to_string(),
818 payload: NoteSidecar {
819 is_obsidian_note: true,
820 title,
821 frontmatter: fm,
822 tags: extracted_tags,
823 links: extracted_links,
824 },
825 }))
826 }
827}
828
829fn write_metadata_tx(
830 tx: &Transaction<'_>,
831 doc_id: i64,
832 title: &str,
833 fm: &Frontmatter,
834) -> lexa_core::Result<()> {
835 let aliases_json =
836 serde_json::to_string(&fm.aliases).map_err(|err| LexaError::Embedding(err.to_string()))?;
837 let raw_yaml = serde_yaml::Value::Mapping(
838 fm.raw
839 .iter()
840 .map(|(k, v)| (serde_yaml::Value::String(k.clone()), v.clone()))
841 .collect(),
842 );
843 let raw_json = serde_json::to_string(&serde_yaml_to_json(&raw_yaml))
844 .map_err(|err| LexaError::Embedding(err.to_string()))?;
845 tx.execute(
846 "INSERT INTO note_metadata(doc_id, title, aliases_json, raw_json)
847 VALUES(?1, ?2, ?3, ?4)
848 ON CONFLICT(doc_id) DO UPDATE SET
849 title = excluded.title,
850 aliases_json = excluded.aliases_json,
851 raw_json = excluded.raw_json",
852 params![doc_id, title, aliases_json, raw_json],
853 )?;
854 Ok(())
855}
856
857fn replace_tags_tx(tx: &Transaction<'_>, doc_id: i64, tags: &[String]) -> lexa_core::Result<()> {
858 tx.execute("DELETE FROM note_tags WHERE doc_id = ?1", params![doc_id])?;
859 for tag in tags {
860 tx.execute(
861 "INSERT OR IGNORE INTO note_tags(doc_id, tag) VALUES(?1, ?2)",
862 params![doc_id, tag],
863 )?;
864 }
865 Ok(())
866}
867
868fn replace_links_tx(
869 tx: &Transaction<'_>,
870 doc_id: i64,
871 links: &[Wikilink],
872) -> lexa_core::Result<()> {
873 tx.execute(
874 "DELETE FROM note_links WHERE src_doc_id = ?1",
875 params![doc_id],
876 )?;
877 for link in links {
878 tx.execute(
879 "INSERT INTO note_links
880 (src_doc_id, target_name, target_path, header, block_id, alias, kind)
881 VALUES(?1, ?2, NULL, ?3, ?4, ?5, ?6)",
882 params![
883 doc_id,
884 link.target_name,
885 link.header,
886 link.block_id,
887 link.alias,
888 link.kind.as_str(),
889 ],
890 )?;
891 }
892 Ok(())
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898
899 #[test]
900 fn trailing_block_id_extracts_basic() {
901 assert_eq!(trailing_block_id("paragraph ^abc-1"), Some("abc-1".into()));
902 assert_eq!(trailing_block_id("no marker here"), None);
903 }
904}