bear_query/
lib.rs

1//! # bear-query
2//!
3//! A completely read-only, minimal-contention library for querying Bear app's SQLite database.
4//!
5//! ## Safety Guarantees
6//!
7//! This library implements multiple layers of protection to ensure minimal interference with Bear:
8//!
9//! 1. **Read-Only File Access**: Opens with `SQLITE_OPEN_READ_ONLY`
10//! 2. **No Internal Locks**: Uses `SQLITE_OPEN_NO_MUTEX` to minimize lock contention
11//! 3. **Query-Only Mode**: Enforces `PRAGMA query_only = ON` at SQLite level
12//! 4. **Short-Lived Connections**: Connections are only open during each query
13//! 5. **Busy Timeout**: 5000ms timeout handles database contention gracefully
14//!
15//! ## How It Works
16//!
17//! Bear does **not** use SQLite's WAL (Write-Ahead Logging) mode by default. To minimize
18//! interference, this library uses short-lived connections that are opened only when needed
19//! and closed immediately after use.
20//!
21//! Each method call:
22//! - Opens a read-only connection with a 5000ms busy timeout
23//! - Executes the query
24//! - Automatically closes the connection
25//!
26//! This approach ensures minimal lock contention with Bear's write operations.
27//!
28//! ## Normalized Schema
29//!
30//! This library automatically normalizes Bear's Core Data schema through Common Table Expressions (CTEs).
31//! All queries (both typed methods and the generic `query()` API) have access to these normalized views:
32//!
33//! ### `notes` Table
34//!
35//! The normalized view of all notes in Bear.
36//!
37//! | Column | Type | Description |
38//! |--------|------|-------------|
39//! | `id` | INTEGER | Note's primary key |
40//! | `unique_id` | TEXT | Bear's UUID for the note |
41//! | `title` | TEXT | Note title |
42//! | `content` | TEXT | Full note content (Markdown) |
43//! | `modified` | DATETIME | Last modification timestamp (converted from Core Data epoch) |
44//! | `created` | DATETIME | Creation timestamp (converted from Core Data epoch) |
45//! | `is_pinned` | INTEGER | 1 if pinned, 0 otherwise |
46//! | `is_trashed` | INTEGER | 1 if in trash, 0 otherwise |
47//! | `is_archived` | INTEGER | 1 if archived, 0 otherwise |
48//!
49//! ### `tags` Table
50//!
51//! The normalized view of all tags.
52//!
53//! | Column | Type | Description |
54//! |--------|------|-------------|
55//! | `id` | INTEGER | Tag's primary key |
56//! | `name` | TEXT | Tag name (e.g., "work/projects") |
57//! | `modified` | DATETIME | Last modification timestamp |
58//!
59//! ### `note_tags` Table
60//!
61//! Junction table linking notes to their tags (many-to-many relationship).
62//!
63//! | Column | Type | Description |
64//! |--------|------|-------------|
65//! | `note_id` | INTEGER | Foreign key to notes.id |
66//! | `tag_id` | INTEGER | Foreign key to tags.id |
67//!
68//! ### `note_links` Table
69//!
70//! Links between notes (bidirectional wiki-style links).
71//!
72//! | Column | Type | Description |
73//! |--------|------|-------------|
74//! | `from_note_id` | INTEGER | Source note ID |
75//! | `to_note_id` | INTEGER | Target note ID |
76//!
77//! ### Core Data Epoch Conversion
78//!
79//! Bear uses Apple's Core Data timestamp format (seconds since 2001-01-01 00:00:00 UTC).
80//! This library automatically converts all timestamps to standard SQLite datetime format.
81//!
82//! The conversion is done via a CTE: `unixepoch('2001-01-01')`
83//!
84//! ### Schema Discovery
85//!
86//! The library discovers variable schema elements at initialization:
87//! - Junction table column names (e.g., `Z_5NOTES`, `Z_13TAGS`)
88//! - These numbers may vary across Bear versions
89//!
90//! For full schema details, see the `SCHEMA.md` documentation file.
91//!
92//! ## Example
93//!
94//! ```no_run
95//! use bear_query::{BearDb, NotesQuery};
96//!
97//! # fn main() -> Result<(), bear_query::BearError> {
98//! // Create a handle (no connection opened yet)
99//! let db = BearDb::new()?;
100//!
101//! // Each method opens a connection, queries, and closes
102//! let all_tags = db.tags()?;
103//! let recent_notes = db.notes(NotesQuery::default())?;
104//!
105//! for note in recent_notes {
106//!     println!("{}", note.title());
107//! }
108//! # Ok(())
109//! # }
110//! ```
111
112mod dataframe;
113mod schema;
114
115pub use polars::prelude as polars_prelude;
116
117use polars::prelude::*;
118use rusqlite::types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef};
119use rusqlite::{Connection, OpenFlags, Row, ToSql};
120use std::collections::{HashMap, HashSet};
121use std::path::PathBuf;
122use std::time::Duration;
123use time::OffsetDateTime;
124
125use dataframe::query_to_dataframe;
126
127/// Specifies the database location for BearDb.
128///
129/// For production code, use RealPath to connect to Bear's database.
130/// For tests, use InMemory to create an isolated test database.
131#[derive(Debug, Clone)]
132pub enum DatabasePath {
133  /// Path to Bear's actual database file
134  RealPath(PathBuf),
135  /// In-memory database for testing (only available with cfg(test))
136  #[cfg(test)]
137  InMemory,
138}
139
140impl DatabasePath {
141  /// Opens a connection based on the database path type.
142  /// For RealPath: opens with read-only flags and safety pragmas
143  /// For InMemory: creates an in-memory database with test schema
144  fn open_connection(&self) -> Result<Connection, BearError> {
145    match self {
146      DatabasePath::RealPath(path) => {
147        // Open with maximum read-only protection:
148        // - SQLITE_OPEN_READ_ONLY: Opens in read-only mode
149        // - SQLITE_OPEN_NO_MUTEX: Disables internal mutexes (safe for single-threaded read-only)
150        let conn = Connection::open_with_flags(
151          path,
152          OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX,
153        )?;
154
155        // Set busy timeout to handle database contention
156        conn.busy_timeout(Duration::from_millis(5000))?;
157
158        // Enable query_only mode as additional safety
159        conn.pragma_update(None, "query_only", "ON")?;
160
161        Ok(conn)
162      }
163      #[cfg(test)]
164      DatabasePath::InMemory => {
165        let conn = Connection::open_in_memory()?;
166        schema::setup_test_schema(&conn)?;
167        Ok(conn)
168      }
169    }
170  }
171}
172
173#[derive(Debug, thiserror::Error)]
174pub enum BearError {
175  #[error("Unable to load users home directory")]
176  NoHomeDirectory,
177  #[error("SQL Error: {source}")]
178  SqlError {
179    #[from]
180    source: rusqlite::Error,
181  },
182  #[error("Polars Error: {source}")]
183  PolarsError {
184    #[from]
185    source: PolarsError,
186  },
187}
188
189/// Query options for filtering notes
190#[derive(Debug, Clone)]
191pub struct NotesQuery {
192  limit: Option<u32>,
193  include_trashed: bool,
194  include_archived: bool,
195}
196
197impl Default for NotesQuery {
198  fn default() -> Self {
199    Self {
200      limit: Some(10),
201      include_trashed: false,
202      include_archived: false,
203    }
204  }
205}
206
207impl NotesQuery {
208  /// Create a new NotesQuery with default settings (limit: 10, exclude trashed and archived)
209  pub fn new() -> Self {
210    Self::default()
211  }
212
213  /// Set a limit on the number of notes to return
214  pub fn limit(
215    mut self,
216    limit: u32,
217  ) -> Self {
218    self.limit = Some(limit);
219    self
220  }
221
222  /// Remove the limit and return all matching notes
223  pub fn no_limit(mut self) -> Self {
224    self.limit = None;
225    self
226  }
227
228  /// Include trashed notes in results
229  pub fn include_trashed(mut self) -> Self {
230    self.include_trashed = true;
231    self
232  }
233
234  /// Include archived notes in results
235  pub fn include_archived(mut self) -> Self {
236    self.include_archived = true;
237    self
238  }
239
240  /// Include both trashed and archived notes in results
241  pub fn include_all(mut self) -> Self {
242    self.include_trashed = true;
243    self.include_archived = true;
244    self
245  }
246}
247
248/// Handle to Bear's database. All operations use short-lived connections internally.
249pub struct BearDb {
250  db_path: DatabasePath,
251  _metadata: schema::BearDbMetadata,
252  normalizing_cte: String,
253}
254
255impl BearDb {
256  /// Create a new BearDb handle. Opens a temporary connection to discover schema metadata,
257  /// generates normalizing CTEs, then closes the connection.
258  pub fn new() -> Result<Self, BearError> {
259    let home_dir = dirs::home_dir().ok_or(BearError::NoHomeDirectory)?;
260
261    let db_path = home_dir.join(
262      "Library/Group Containers/9K33E3U3T4.net.shinyfrog.bear/Application Data/database.sqlite",
263    );
264
265    Self::new_with_path(DatabasePath::RealPath(db_path))
266  }
267
268  /// Create a new BearDb handle with a specific database path.
269  /// This is primarily for testing with in-memory databases.
270  pub(crate) fn new_with_path(db_path: DatabasePath) -> Result<Self, BearError> {
271    // Open temporary connection to discover metadata
272    let connection = db_path.open_connection()?;
273
274    // Discover schema metadata
275    let metadata = schema::discover_metadata(&connection)?;
276
277    // Generate normalizing CTE based on discovered metadata
278    let normalizing_cte = schema::generate_normalizing_cte(&metadata);
279
280    // Connection is dropped here, closing it
281    drop(connection);
282
283    Ok(BearDb {
284      db_path,
285      _metadata: metadata,
286      normalizing_cte,
287    })
288  }
289
290  /// Opens a short-lived connection, wraps it in a Queryable with normalizing CTEs,
291  /// executes the closure, and closes the connection.
292  fn with_connection<F, R>(
293    &self,
294    f: F,
295  ) -> Result<R, BearError>
296  where
297    F: FnOnce(&Queryable) -> Result<R, BearError>,
298  {
299    // Open connection using DatabasePath's connection handler
300    let connection = self.db_path.open_connection()?;
301
302    // Create Queryable wrapper with normalizing CTE
303    let queryable = Queryable::new(&connection, &self.normalizing_cte);
304
305    // Execute the closure with the queryable
306    // Connection will be automatically closed when it goes out of scope
307    f(&queryable)
308  }
309
310  /// Retrieves all tags from Bear
311  pub fn tags(&self) -> Result<BearTags, BearError> {
312    self.with_connection(|queryable| {
313      let mut statement = queryable.prepare(
314        r"
315      SELECT
316        id,
317        name,
318        modified
319      FROM tags
320      ORDER BY name ASC",
321      )?;
322
323      let results: rusqlite::Result<Vec<BearTag>> = statement
324        .query_map([], |row| {
325          Ok(BearTag {
326            id: row.get("id")?,
327            name: row.get("name")?,
328            modified: row.get("modified")?,
329          })
330        })?
331        .collect();
332
333      let tags = results?.into_iter().map(|tag| (tag.id, tag)).collect();
334
335      Ok(BearTags { tags })
336    })
337  }
338
339  /// Retrieves notes from Bear, ordered by most recently modified.
340  ///
341  /// # Examples
342  /// ```no_run
343  /// # use bear_query::{BearDb, NotesQuery};
344  /// # fn main() -> Result<(), bear_query::BearError> {
345  /// let db = BearDb::new()?;
346  ///
347  /// // Get 10 most recent notes (default)
348  /// let notes = db.notes(NotesQuery::default())?;
349  ///
350  /// // Get 20 most recent notes
351  /// let notes = db.notes(NotesQuery::new().limit(20))?;
352  ///
353  /// // Get all notes including trashed and archived
354  /// let notes = db.notes(NotesQuery::new().no_limit().include_all())?;
355  /// # Ok(())
356  /// # }
357  /// ```
358  pub fn notes(
359    &self,
360    query: NotesQuery,
361  ) -> Result<Vec<BearNote>, BearError> {
362    self.with_connection(|queryable| {
363      // Build WHERE clause based on query options
364      let mut where_clauses = Vec::new();
365      if !query.include_trashed {
366        where_clauses.push("is_trashed <> 1");
367      }
368      if !query.include_archived {
369        where_clauses.push("is_archived <> 1");
370      }
371
372      let where_clause = if where_clauses.is_empty() {
373        String::new()
374      } else {
375        format!("WHERE {}", where_clauses.join(" AND "))
376      };
377
378      let limit_clause = query
379        .limit
380        .map(|l| format!("LIMIT {}", l))
381        .unwrap_or_default();
382
383      let query = format!(
384        r"
385      SELECT
386        id,
387        unique_id,
388        title,
389        content,
390        modified,
391        created,
392        is_pinned
393      FROM notes
394      {}
395      ORDER BY modified DESC
396      {}",
397        where_clause, limit_clause
398      );
399
400      let mut statement = queryable.prepare(&query)?;
401
402      let results: rusqlite::Result<Vec<BearNote>> =
403        statement.query_map([], note_from_row)?.collect();
404
405      Ok(results?)
406    })
407  }
408
409  /// Retrieves all notes linked from the specified note
410  pub fn note_links(
411    &self,
412    from: BearNoteId,
413  ) -> Result<Vec<BearNote>, BearError> {
414    self.with_connection(|queryable| {
415      let mut statement = queryable.prepare(
416        r"
417      SELECT
418        n.id,
419        n.unique_id,
420        n.title,
421        n.content,
422        n.modified,
423        n.created,
424        n.is_pinned
425      FROM notes as n
426      INNER JOIN note_links as nl ON nl.to_note_id = n.id
427      WHERE n.is_trashed <> 1 AND n.is_archived <> 1 AND nl.from_note_id = ?
428      ORDER BY n.modified DESC",
429      )?;
430
431      let results: rusqlite::Result<Vec<BearNote>> =
432        statement.query_map([from], note_from_row)?.collect();
433
434      Ok(results?)
435    })
436  }
437
438  /// Retrieves all tag IDs associated with the specified note
439  pub fn note_tags(
440    &self,
441    from: BearNoteId,
442  ) -> Result<HashSet<BearTagId>, BearError> {
443    self.with_connection(|queryable| {
444      let mut statement = queryable.prepare(
445        r"
446      SELECT
447        tag_id
448      FROM note_tags
449      WHERE note_id = ?",
450      )?;
451
452      let results: rusqlite::Result<HashSet<BearTagId>> = statement
453        .query_map([from], |row| row.get("tag_id"))?
454        .collect();
455
456      Ok(results?)
457    })
458  }
459
460  /// Execute a generic SQL SELECT query and return results as a Polars DataFrame.
461  ///
462  /// The query automatically has the normalizing CTEs prepended, so you can query
463  /// against clean table names: `notes`, `tags`, `note_tags`, `note_links`.
464  ///
465  /// # Safety
466  /// This method trusts the read-only connection flags to prevent writes. Only SELECT
467  /// queries should be used, though this is not enforced by the library.
468  ///
469  /// # Examples
470  /// ```no_run
471  /// # use bear_query::BearDb;
472  /// # fn main() -> Result<(), bear_query::BearError> {
473  /// let db = BearDb::new()?;
474  ///
475  /// // Query normalized tables
476  /// let df = db.query("SELECT title, modified FROM notes LIMIT 5")?;
477  ///
478  /// // Join tables
479  /// let df = db.query(r"
480  ///   SELECT n.title, t.name as tag_name
481  ///   FROM notes n
482  ///   JOIN note_tags nt ON n.id = nt.note_id
483  ///   JOIN tags t ON nt.tag_id = t.id
484  ///   WHERE n.is_trashed = 0
485  ///   LIMIT 10
486  /// ")?;
487  ///
488  /// println!("{}", df);
489  /// # Ok(())
490  /// # }
491  /// ```
492  pub fn query(
493    &self,
494    sql: &str,
495  ) -> Result<DataFrame, BearError> {
496    self.with_connection(|queryable| query_to_dataframe(queryable, sql))
497  }
498}
499
500/// A wrapper around a database connection that automatically applies normalizing CTEs to queries.
501/// This abstracts away Bear's Core Data quirks (Z_ prefixes, numbered columns, epoch timestamps).
502pub struct Queryable<'a> {
503  conn: &'a Connection,
504  normalizing_cte: &'a str,
505}
506
507impl<'a> Queryable<'a> {
508  /// Creates a new Queryable from a connection and pre-generated CTE string
509  fn new(
510    conn: &'a Connection,
511    normalizing_cte: &'a str,
512  ) -> Self {
513    Self {
514      conn,
515      normalizing_cte,
516    }
517  }
518
519  /// Test-only constructor for creating Queryable in tests
520  ///
521  /// This is pub(crate) so tests in other modules can create Queryables
522  #[cfg(test)]
523  pub(crate) fn new_for_test(
524    conn: &'a Connection,
525    normalizing_cte: &'a str,
526  ) -> Self {
527    Self::new(conn, normalizing_cte)
528  }
529
530  /// Prepares a statement with the normalizing CTE automatically prepended.
531  /// The user's SQL should query against normalized table names (notes, tags, note_tags, note_links).
532  pub fn prepare(
533    &self,
534    user_sql: &str,
535  ) -> rusqlite::Result<rusqlite::Statement<'a>> {
536    let full_sql = format!("{}\n{}", self.normalizing_cte, user_sql);
537    self.conn.prepare(&full_sql)
538  }
539}
540
541#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
542pub struct DbId(i64);
543
544#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
545pub struct BearNoteId(DbId);
546
547#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
548pub struct BearTagId(DbId);
549
550impl FromSql for DbId {
551  fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
552    Ok(Self(value.as_i64()?))
553  }
554}
555
556impl FromSql for BearNoteId {
557  fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
558    Ok(Self(FromSql::column_result(value)?))
559  }
560}
561
562impl FromSql for BearTagId {
563  fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
564    Ok(Self(FromSql::column_result(value)?))
565  }
566}
567
568impl ToSql for DbId {
569  fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
570    self.0.to_sql()
571  }
572}
573
574impl ToSql for BearNoteId {
575  fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
576    self.0.to_sql()
577  }
578}
579
580#[derive(Debug, Clone)]
581pub struct BearTag {
582  id: BearTagId,
583  name: String,
584  modified: Option<OffsetDateTime>,
585}
586
587impl BearTag {
588  pub fn id(&self) -> BearTagId {
589    self.id
590  }
591
592  pub fn name(&self) -> &str {
593    &self.name
594  }
595
596  pub fn modified(&self) -> Option<OffsetDateTime> {
597    self.modified
598  }
599}
600
601#[derive(Debug)]
602pub struct BearTags {
603  tags: HashMap<BearTagId, BearTag>,
604}
605
606impl BearTags {
607  pub fn get(
608    &self,
609    tag_id: &BearTagId,
610  ) -> Option<&BearTag> {
611    self.tags.get(tag_id)
612  }
613
614  pub fn count(&self) -> usize {
615    self.tags.len()
616  }
617
618  pub fn iter(&self) -> impl Iterator<Item = &BearTag> {
619    self.tags.values()
620  }
621
622  pub fn names(
623    &self,
624    tag_ids: &HashSet<BearTagId>,
625  ) -> HashSet<String> {
626    tag_ids
627      .iter()
628      .filter_map(|id| self.get(id).map(|t| t.name.clone()))
629      .collect()
630  }
631}
632
633#[derive(Debug)]
634pub struct BearNote {
635  id: BearNoteId,
636  unique_id: String,
637  title: String,
638  content: String,
639  modified: OffsetDateTime,
640  created: OffsetDateTime,
641  is_pinned: bool,
642}
643
644impl BearNote {
645  pub fn id(&self) -> BearNoteId {
646    self.id
647  }
648
649  pub fn unique_id(&self) -> &str {
650    &self.unique_id
651  }
652
653  pub fn title(&self) -> &str {
654    &self.title
655  }
656
657  pub fn content(&self) -> &str {
658    &self.content
659  }
660
661  pub fn modified(&self) -> OffsetDateTime {
662    self.modified
663  }
664
665  pub fn created(&self) -> OffsetDateTime {
666    self.created
667  }
668
669  pub fn is_pinned(&self) -> bool {
670    self.is_pinned
671  }
672}
673
674fn note_from_row(row: &Row) -> rusqlite::Result<BearNote> {
675  Ok(BearNote {
676    id: row.get("id")?,
677    unique_id: row.get("unique_id")?,
678    title: row.get("title")?,
679    content: row.get("content")?,
680    created: row.get("created")?,
681    modified: row.get("modified")?,
682    is_pinned: row.get("is_pinned")?,
683  })
684}
685
686#[cfg(test)]
687mod tests {
688  use super::*;
689
690  /// Integration test demonstrating BearDb with in-memory database
691  #[test]
692  fn test_beardb_with_inmemory() {
693    // Create a BearDb with an in-memory database (automatically sets up schema)
694    let db = BearDb::new_with_path(DatabasePath::InMemory).unwrap();
695
696    // Test the typed API
697    let tags = db.tags().unwrap();
698    assert_eq!(tags.count(), 2); // Should have 2 tags from test data
699
700    let notes = db.notes(NotesQuery::default()).unwrap();
701    assert_eq!(notes.len(), 2); // default() excludes trashed, so 2 notes
702
703    // Test filtering - include all notes
704    let all_notes = db
705      .notes(NotesQuery::new().include_all().no_limit())
706      .unwrap();
707    assert_eq!(all_notes.len(), 3); // 3 notes total including trashed
708
709    // Test the generic SQL query API
710    let df = db
711      .query("SELECT id, title FROM notes WHERE is_trashed = 0")
712      .unwrap();
713    assert_eq!(df.height(), 2); // 2 non-trashed notes
714    assert_eq!(df.width(), 2); // 2 columns (id, title)
715
716    // Test aggregation
717    let df = db.query("SELECT COUNT(*) as count FROM notes").unwrap();
718    assert_eq!(df.height(), 1);
719    assert_eq!(df.width(), 1);
720
721    // Verify the count column is an integer (not string)
722    let series = df.column("count").unwrap();
723    let value = series.get(0).unwrap();
724    match value {
725      AnyValue::Int64(n) => assert_eq!(n, 3),
726      _ => panic!("Expected Int64, got: {:?}", value),
727    }
728
729    // Test join query
730    let df = db
731      .query(
732        r"
733      SELECT n.title, t.name as tag_name
734      FROM notes n
735      JOIN note_tags nt ON n.id = nt.note_id
736      JOIN tags t ON nt.tag_id = t.id
737    ",
738      )
739      .unwrap();
740    assert_eq!(df.height(), 2); // 2 note-tag relationships
741  }
742}