1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
//! Tonbo is an embedded database for serverless data-intensive applications.
//!
//! - **Arrow-native schemas** with rich, typed structures
//! - **Stores data as Parquet** directly on object storage (S3, R2) or local filesystem
//! - **Fully asynchronous** and runs in multiple runtimes: browsers, edge functions, or inside
//! other databases
//!
//! No server process to manage. Each database is just a manifest on S3, adding more is trivial.
//!
//! # Quick Start
//!
//! Add Tonbo to your project:
//!
//! ```bash
//! cargo add tonbo tokio
//! ```
//!
//! ## Basic Usage
//!
//! ```rust,no_run
//! use std::sync::Arc;
//!
//! use arrow_array::{Int64Array, RecordBatch, StringArray};
//! use arrow_schema::{DataType, Field, Schema};
//! use tonbo::db::{DbBuilder, Expr, ScalarValue};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Define schema: User { id: String, name: String, score: i64 }
//! let schema = Arc::new(Schema::new(vec![
//! Field::new("id", DataType::Utf8, false),
//! Field::new("name", DataType::Utf8, false),
//! Field::new("score", DataType::Int64, true),
//! ]));
//!
//! // Open database on local disk
//! let db = DbBuilder::from_schema_key_name(schema.clone(), "id")?
//! .on_disk("/tmp/tonbo_doctest")?
//! .open()
//! .await?;
//!
//! // Insert data as Arrow RecordBatch
//! let batch = RecordBatch::try_new(
//! schema,
//! vec![
//! Arc::new(StringArray::from(vec!["u1", "u2"])),
//! Arc::new(StringArray::from(vec!["Alice", "Bob"])),
//! Arc::new(Int64Array::from(vec![100, 85])),
//! ],
//! )?;
//! db.ingest(batch).await?;
//!
//! // Query: score > 80
//! let filter = Expr::gt("score", ScalarValue::from(80_i64));
//! let results = db.scan().filter(filter).collect().await?;
//!
//! Ok(())
//! }
//! ```
//!
//! For a more ergonomic API, use [`typed_arrow`]'s `#[derive(Record)]` via `tonbo::prelude::*`
//! (`typed-arrow` feature is enabled by default). Mark your primary key field with
//! `#[metadata(k = "tonbo.key", v = "true")]`:
//! Mark your primary key field with `#[metadata(k = "tonbo.key", v = "true")]`:
//!
//! ```rust,ignore
//! use tonbo::prelude::*;
//!
//! #[derive(Record)]
//! struct User {
//! #[metadata(k = "tonbo.key", v = "true")]
//! id: String,
//! name: String,
//! score: Option<i64>,
//! }
//!
//! // Key is automatically detected from schema metadata
//! let db = DbBuilder::from_schema(User::schema())?
//! .on_disk("/tmp/users")?
//! .open()
//! .await?;
//!
//! let users = vec![
//! User { id: "u1".into(), name: "Alice".into(), score: Some(100) },
//! ];
//! let mut builders = User::new_builders(users.len());
//! builders.append_rows(users);
//! db.ingest(builders.finish().into_record_batch()).await?;
//! ```
//!
//! ## Using S3 / Object Storage
//!
//! Tonbo stores data as Parquet files on any S3-compatible storage (AWS S3, Cloudflare R2, MinIO):
//!
//! ```rust,ignore
//! use tonbo::db::{AwsCreds, DbBuilder, ObjectSpec, S3Spec};
//!
//! let credentials = AwsCreds::from_env()?;
//! let mut s3_spec = S3Spec::new("my-bucket", "data/users", credentials);
//! s3_spec.region = Some("us-east-1".into());
//!
//! let db = DbBuilder::from_schema_key_name(User::schema(), "id")?
//! .object_store(ObjectSpec::s3(s3_spec))?
//! .open()
//! .await?;
//! ```
//!
//! # Core Concepts
//!
//! ## Schema Definition
//!
//! Use the `#[derive(Record)]` macro from [`typed_arrow`] (available via `tonbo::prelude::*`) to
//! define your schema.
//! Mark primary key fields with `#[metadata(k = "tonbo.key", v = "true")]`:
//!
//! ```rust,ignore
//! use tonbo::prelude::Record;
//!
//! #[derive(Record)]
//! struct Event {
//! #[metadata(k = "tonbo.key", v = "true")]
//! id: String,
//! timestamp: i64,
//! event_type: String,
//! payload: Option<String>, // Nullable field
//! }
//! ```
//!
//! For composite keys, use ordinal values:
//!
//! ```rust,ignore
//! #[derive(Record)]
//! struct TimeSeries {
//! #[metadata(k = "tonbo.key", v = "0")]
//! device_id: String,
//! #[metadata(k = "tonbo.key", v = "1")]
//! timestamp: i64,
//! value: f64,
//! }
//! ```
//!
//! ## Database Operations
//!
//! - **[`DbBuilder::from_schema`](db::DbBuilder::from_schema)** - Create DB with auto-detected key
//! from metadata
//! - **[`DbBuilder`](db::DbBuilder)** - Configure and open a database
//! - **[`DB`](db::DB)** - The main database handle for reads and writes
//! - **[`DB::ingest`](db::DB::ingest)** - Batch insert records
//! - **[`DB::scan`](db::DB::scan)** - Query with filters and projections
//! - **[`DB::begin_transaction`](db::DB::begin_transaction)** - MVCC transactions with
//! read-your-writes
//!
//! ## Predicates
//!
//! Build query filters using [`Expr`](db::Expr):
//!
//! ```rust,ignore
//! use tonbo::db::{Expr, ScalarValue};
//!
//! // Equality
//! let filter = Expr::eq("status", ScalarValue::from("active"));
//!
//! // Comparison
//! let filter = Expr::gt("age", ScalarValue::from(18_i64));
//!
//! // Logical operators
//! let filter = Expr::and(vec![
//! Expr::gt("age", ScalarValue::from(18_i64)),
//! Expr::eq("country", ScalarValue::from("US")),
//! ]);
//! ```
//!
//! # Feature Flags
//!
//! Tonbo uses feature flags to configure runtime and storage backends:
//!
//! - **`tokio`** *(default)* - Tokio async runtime with local filesystem support
//! - **`typed-arrow`** *(default)* - Provides [`typed_arrow`] derive helpers via `tonbo::prelude`
//! - **`web`** - WebAssembly support for browsers and edge runtimes
//! - **`web-opfs`** - Browser Origin Private File System storage (requires `web`)
//!
//! ## Default Configuration
//!
//! ```toml
//! [dependencies]
//! tonbo = "0.1"
//! ```
//!
//! This includes both `tokio` runtime and `typed-arrow` for schema derivation.
//!
//! ## WebAssembly / Browser
//!
//! ```toml
//! [dependencies]
//! tonbo = { version = "0.1", default-features = false, features = ["web", "typed-arrow"] }
//! ```
//!
//! # Examples
//!
//! Run examples with `cargo run --example <name>`:
//!
//! | Example | Description |
//! |---------|-------------|
//! | `01_basic` | Define schema, insert, and query in 30 lines |
//! | `02_transaction` | MVCC transactions with upsert, delete, read-your-writes |
//! | `02b_snapshot` | Consistent point-in-time reads while writes continue |
//! | `03_filter` | Predicates: eq, gt, in, is_null, and, or, not |
//! | `04_s3` | Store Parquet files on S3/R2/MinIO |
//! | `05_scan_options` | Projection pushdown reads only needed columns |
//! | `06_composite_key` | Multi-column keys for time-series data |
//! | `07_streaming` | Process millions of rows without loading into memory |
//! | `08_nested_types` | Deep struct nesting + Lists as Arrow StructArray |
//! | `09_time_travel` | Query historical snapshots via MVCC timestamps |
//!
//! # Architecture
//!
//! Tonbo implements an LSM-tree style architecture optimized for analytical workloads:
//!
//! 1. **Write Path**: Data is written to an in-memory buffer, then flushed to immutable Parquet
//! files on storage
//! 2. **WAL**: Write-ahead log ensures durability before acknowledgment
//! 3. **Manifest**: Tracks all Parquet files and database state; uses compare-and-swap for
//! coordination on object storage
//! 4. **Compaction**: Background process merges small files into larger ones
//! 5. **MVCC**: Multi-version concurrency control enables snapshot isolation
//!
//! # Platform Support
//!
//! | Platform | Runtime | Storage |
//! |----------|---------|---------|
//! | Linux/macOS/Windows | Tokio | Local filesystem, S3 |
//! | WebAssembly | Browser async | S3, OPFS |
//! | Edge (Deno, Workers) | Platform async | S3 |
pub
/// File and object identifiers.
pub
/// Zero-copy key projection scaffolding and owned key wrapper.
pub
pub
pub
/// Tracing infrastructure for Tonbo observability.
/// Convenience re-exports for common usage.
/// Test helper re-exports for crate-internal tests.
/// Generic DB that dispatches between typed and dynamic modes via generic types.
pub
/// Write-ahead log framework (async, fusio-backed).
pub
/// Manifest integration atop `fusio-manifest`.
pub
/// MVCC primitives shared across modules.
pub
/// Optimistic transaction scaffolding (write path focus for now).
pub
/// On-disk persistence scaffolding (SSTable skeletons).
pub
/// Simple compaction orchestrators.
pub