toondb/
lib.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # ToonDB Client SDK
16//!
17//! LLM-optimized database client with 40-66% token savings vs JSON.
18//!
19//! ## Key Features
20//!
21//! - **Path-based access**: O(|path|) resolution independent of data size
22//! - **Token-efficient**: TOON format uses 40-66% fewer tokens than JSON
23//! - **ACID transactions**: Full MVCC with snapshot isolation
24//! - **Vector search**: Scale-aware backend (HNSW for small, Vamana+PQ for large)
25//! - **Columnar storage**: 80% I/O reduction via projection pushdown
26//!
27//! ## Connection Types
28//!
29//! The SDK provides two connection types:
30//!
31//! - **`Connection`** (alias for `DurableConnection`): Production-grade with WAL durability,
32//!   MVCC transactions, crash recovery. **Use this for production.**
33//!
34//! - **`InMemoryConnection`** (alias for `ToonConnection`): Fast in-memory storage for testing.
35//!   Data is not persisted. **Use only for tests or ephemeral data.**
36//!
37//! ## Quick Start
38//!
39//! ```rust,ignore
40//! use toondb::prelude::*;
41//!
42//! // Open a durable connection (default - uses WAL for persistence)
43//! let conn = Connection::open("./data")?;
44//!
45//! // Or for testing, use in-memory
46//! let test_conn = InMemoryConnection::open("./test_data")?;
47//!
48//! // Query with TOON output (66% fewer tokens)
49//! let result = client.query("users")
50//!     .filter("score", Gt, 80)
51//!     .limit(100)
52//!     .to_toon()?;
53//!
54//! println!("Tokens: {}", result.metrics().toon_tokens);
55//! println!("Savings: {:.1}%", result.metrics().token_savings_percent());
56//! ```
57//!
58//! ## CONTEXT SELECT for LLM Context
59//!
60//! ```rust,ignore
61//! let context = client.context_query()
62//!     .from_session("session_id")
63//!     .with_token_limit(4000)
64//!     .user_section(|s| s.columns(&["query", "preferences"]).priority(1))
65//!     .history_section(|s| s.columns(&["recent"]).priority(2))
66//!     .execute()?;
67//! ```
68
69pub mod batch;
70pub mod column_access;
71pub mod connection;
72pub mod context_query;
73pub mod crud;
74pub mod error;
75pub mod path_query;
76pub mod query;
77pub mod recovery;
78pub mod result;
79pub mod schema;
80pub mod storage;
81pub mod transaction;
82pub mod vectors;
83
84// Primary connection API - DurableConnection is the default
85pub use connection::DurableConnection;
86/// Type alias for the default connection - uses durable storage with WAL
87pub type Connection = DurableConnection;
88
89/// Type alias for Database (for users expecting `use toondb::Database`)
90/// This is the same as `Connection` - a durable database connection.
91pub type Database = DurableConnection;
92
93// For backwards compatibility and testing
94pub use connection::ToonConnection;
95/// Alias for in-memory connection (for testing)
96pub type InMemoryConnection = ToonConnection;
97
98pub use batch::{BatchOp, BatchResult, BatchWriter};
99pub use column_access::{ColumnView, TypedColumn};
100#[cfg(feature = "embedded")]
101pub use connection::EmbeddedConnection;
102pub use connection::{ConnectionConfig, DurableStats, RecoveryResult, SyncModeClient};
103pub use context_query::{ContextQueryBuilder, ContextQueryResult, SectionBuilder, SectionContent};
104pub use crud::{DeleteResult, InsertResult, RowBuilder, UpdateResult};
105pub use path_query::PathQuery;
106pub use result::{ResultMetrics, ToonResult};
107pub use schema::{SchemaBuilder, TableDescription};
108pub use transaction::{ClientTransaction, IsolationLevel, SnapshotReader};
109pub use vectors::{SearchResult, VectorCollection};
110// Re-export deprecated GroupCommitBuffer with warning
111#[allow(deprecated)]
112pub use batch::{GroupCommitBuffer, GroupCommitConfig};
113pub use error::ClientError;
114pub use query::{QueryExecutor, QueryResult};
115pub use recovery::{CheckpointResult, RecoveryManager, RecoveryStatus, WalVerificationResult};
116
117// Re-export columnar query result from storage layer
118pub use toondb_storage::ColumnarQueryResult;
119
120use std::path::Path;
121use std::sync::Arc;
122
123/// ToonDB Client - LLM-optimized database access
124///
125/// # Token Efficiency
126///
127/// TOON format achieves 40-66% token reduction vs JSON:
128/// - JSON: `{"field1": "val1", "field2": "val2"}`
129/// - TOON: `table[N]{f1,f2}: v1,v2`
130///
131/// For 100 rows × 5 fields:
132/// - JSON: ~7,500 tokens
133/// - TOON: ~2,550 tokens (66% savings)
134pub struct ToonClient {
135    connection: Arc<ToonConnection>,
136    config: ClientConfig,
137}
138
139/// Client configuration
140#[derive(Debug, Clone)]
141pub struct ClientConfig {
142    /// Maximum tokens per response (for LLM context management)
143    pub token_budget: Option<usize>,
144    /// Enable streaming output
145    pub streaming: bool,
146    /// Default output format
147    pub output_format: OutputFormat,
148    /// Connection pool size
149    pub pool_size: usize,
150}
151
152/// Output format selection
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
154pub enum OutputFormat {
155    /// TOON format (default, 40-66% fewer tokens)
156    Toon,
157    /// JSON (for compatibility)
158    Json,
159    /// Raw columnar (for analytics)
160    Columnar,
161}
162
163impl Default for ClientConfig {
164    fn default() -> Self {
165        Self {
166            token_budget: None,
167            streaming: false,
168            output_format: OutputFormat::Toon,
169            pool_size: 10,
170        }
171    }
172}
173
174impl ToonClient {
175    /// Open database at path
176    pub fn open(path: impl AsRef<Path>) -> Result<Self, ClientError> {
177        let connection = ToonConnection::open(path)?;
178        Ok(Self {
179            connection: Arc::new(connection),
180            config: ClientConfig::default(),
181        })
182    }
183
184    /// Open with custom configuration
185    pub fn open_with_config(
186        path: impl AsRef<Path>,
187        config: ClientConfig,
188    ) -> Result<Self, ClientError> {
189        let connection = ToonConnection::open(path)?;
190        Ok(Self {
191            connection: Arc::new(connection),
192            config,
193        })
194    }
195
196    /// Set token budget for responses
197    pub fn with_token_budget(mut self, budget: usize) -> Self {
198        self.config.token_budget = Some(budget);
199        self
200    }
201
202    /// Start a path-based query (ToonDB's unique access pattern)
203    /// O(|path|) resolution, not O(N) scan
204    pub fn query(&self, path: &str) -> PathQuery<'_> {
205        PathQuery::from_path(&self.connection, path)
206    }
207
208    /// Access vector collection
209    pub fn vectors(&self, name: &str) -> Result<VectorCollection, ClientError> {
210        VectorCollection::open(&self.connection, name)
211    }
212
213    /// Begin transaction with default isolation (snapshot)
214    pub fn begin(&self) -> Result<ClientTransaction<'_>, ClientError> {
215        ClientTransaction::begin(&self.connection, IsolationLevel::SnapshotIsolation)
216    }
217
218    /// Begin transaction with specified isolation level
219    pub fn begin_with_isolation(
220        &self,
221        isolation: IsolationLevel,
222    ) -> Result<ClientTransaction<'_>, ClientError> {
223        ClientTransaction::begin(&self.connection, isolation)
224    }
225
226    /// Create a read-only snapshot at current time
227    pub fn snapshot(&self) -> Result<SnapshotReader<'_>, ClientError> {
228        SnapshotReader::now(&self.connection)
229    }
230
231    /// Execute raw TOON-QL query
232    pub fn execute(&self, sql: &str) -> Result<QueryResult, ClientError> {
233        self.connection.query_sql(sql)
234    }
235
236    /// Get connection for direct access
237    pub fn connection(&self) -> &ToonConnection {
238        &self.connection
239    }
240
241    /// Get client statistics
242    pub fn stats(&self) -> ClientStats {
243        self.connection.stats()
244    }
245
246    /// Get token budget
247    pub fn token_budget(&self) -> Option<usize> {
248        self.config.token_budget
249    }
250
251    /// Get output format
252    pub fn output_format(&self) -> OutputFormat {
253        self.config.output_format
254    }
255}
256
257// ============================================================================
258// DurableToonClient - WAL-backed ToonClient
259// ============================================================================
260
261/// Durable ToonClient backed by EmbeddedConnection with WAL/MVCC
262///
263/// Unlike `ToonClient` which uses in-memory `ToonConnection`, this uses
264/// `EmbeddedConnection` which wraps the full Database kernel with:
265/// - Write-Ahead Logging (WAL) for durability
266/// - MVCC with SSI for proper transaction isolation
267/// - Crash recovery
268///
269/// Use this for production workloads requiring ACID guarantees.
270#[cfg(feature = "embedded")]
271pub struct DurableToonClient {
272    connection: Arc<EmbeddedConnection>,
273    config: ClientConfig,
274}
275
276#[cfg(feature = "embedded")]
277impl DurableToonClient {
278    /// Open durable database at path with WAL/MVCC
279    pub fn open(path: impl AsRef<Path>) -> Result<Self, ClientError> {
280        let connection = EmbeddedConnection::open(path)?;
281        Ok(Self {
282            connection: Arc::new(connection),
283            config: ClientConfig::default(),
284        })
285    }
286
287    /// Create from existing connection
288    pub fn from_connection(connection: Arc<EmbeddedConnection>) -> Self {
289        Self {
290            connection,
291            config: ClientConfig::default(),
292        }
293    }
294
295    /// Open with custom configuration
296    pub fn open_with_config(
297        path: impl AsRef<Path>,
298        config: ClientConfig,
299        db_config: toondb_storage::database::DatabaseConfig,
300    ) -> Result<Self, ClientError> {
301        let connection = EmbeddedConnection::open_with_config(path, db_config)?;
302        Ok(Self {
303            connection: Arc::new(connection),
304            config,
305        })
306    }
307
308    /// Set token budget for responses
309    pub fn with_token_budget(mut self, budget: usize) -> Self {
310        self.config.token_budget = Some(budget);
311        self
312    }
313
314    /// Begin a transaction
315    pub fn begin(&self) -> Result<(), ClientError> {
316        self.connection.begin()
317    }
318
319    /// Commit the active transaction
320    pub fn commit(&self) -> Result<u64, ClientError> {
321        self.connection.commit()
322    }
323
324    /// Abort the active transaction
325    pub fn abort(&self) -> Result<(), ClientError> {
326        self.connection.abort()
327    }
328
329    /// Put bytes at a path
330    pub fn put(&self, path: &str, value: &[u8]) -> Result<(), ClientError> {
331        self.connection.put(path, value)
332    }
333
334    /// Get bytes at a path
335    pub fn get(&self, path: &str) -> Result<Option<Vec<u8>>, ClientError> {
336        self.connection.get(path)
337    }
338
339    /// Delete a path
340    pub fn delete(&self, path: &str) -> Result<(), ClientError> {
341        self.connection.delete(path)
342    }
343
344    /// Scan paths with prefix
345    pub fn scan(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>, ClientError> {
346        self.connection.scan(prefix)
347    }
348
349    /// Get database statistics
350    pub fn stats(&self) -> ClientStats {
351        self.connection.stats()
352    }
353
354    /// Force fsync
355    pub fn fsync(&self) -> Result<(), ClientError> {
356        self.connection.fsync()
357    }
358
359    /// Get the underlying connection
360    pub fn connection(&self) -> &EmbeddedConnection {
361        &self.connection
362    }
363
364    /// Get token budget
365    pub fn token_budget(&self) -> Option<usize> {
366        self.config.token_budget
367    }
368
369    /// Get output format
370    pub fn output_format(&self) -> OutputFormat {
371        self.config.output_format
372    }
373}
374
375/// Client statistics
376#[derive(Debug, Clone)]
377pub struct ClientStats {
378    /// Total queries executed
379    pub queries_executed: u64,
380    /// Total TOON tokens emitted
381    pub toon_tokens_emitted: u64,
382    /// Equivalent JSON tokens
383    pub json_tokens_equivalent: u64,
384    /// Token savings percentage
385    pub token_savings_percent: f64,
386    /// Cache hit rate
387    pub cache_hit_rate: f64,
388}
389
390/// Prelude for convenient imports
391pub mod prelude {
392    #[cfg(feature = "embedded")]
393    pub use crate::DurableToonClient;
394    pub use crate::path_query::CompareOp;
395    pub use crate::{
396        ClientConfig,
397        ClientError,
398        ClientStats,
399        ClientTransaction,
400        // Connection types
401        Connection,
402        DeleteResult,
403        DurableConnection,
404        InMemoryConnection,
405        InsertResult,
406        IsolationLevel,
407        OutputFormat,
408        PathQuery,
409        ResultMetrics,
410        RowBuilder,
411        SchemaBuilder,
412        SearchResult,
413        SnapshotReader,
414        TableDescription,
415        ToonClient,
416        ToonResult,
417        UpdateResult,
418        VectorCollection,
419    };
420    pub use toondb_core::toon::{ToonType, ToonValue};
421}