motedb 0.2.0

AI-native embedded multimodal database for embodied intelligence (robots, AR glasses, industrial arms).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//! SQL Fast Path Correctness Tests
//!
//! Tests for the executor fast paths:
//! - MATCH AGAINST (text search)
//! - ST_WITHIN (spatial range)
//! - ST_KNN (spatial nearest neighbor)
//! - ST_DISTANCE ORDER BY (spatial distance sort)
//! - Vector ORDER BY (<->)
//! - Mixed WHERE clauses
//!
//! Run: cargo test --test test_fast_paths -- --test-threads=1

use motedb::Database;
use motedb::types::Value;
use tempfile::TempDir;

fn create_db() -> (Database, TempDir) {
    let dir = TempDir::new().expect("temp dir");
    let db = Database::create(dir.path()).expect("create db");
    (db, dir)
}

fn exec(db: &Database, sql: &str) -> motedb::sql::QueryResult {
    db.execute(sql).unwrap_or_else(|e| panic!("SQL '{sql}': {e}")).materialize().expect("materialize")
}

fn rows(db: &Database, sql: &str) -> Vec<Vec<Value>> {
    match exec(db, sql) {
        motedb::sql::QueryResult::Select { rows, .. } => rows,
        _ => vec![],
    }
}

fn setup_spatial(db: &Database) {
    exec(db, "CREATE TABLE locations (id INTEGER PRIMARY KEY, name TEXT, coords GEOMETRY)");
    exec(db, "CREATE SPATIAL INDEX locations_coords ON locations(coords)");

    // Insert a grid of points: (116+x*0.1, 40+y*0.1) for x,y in 0..5
    for x in 0..5i64 {
        for y in 0..5i64 {
            let id = x * 5 + y + 1;
            let px = 116.0 + x as f64 * 0.1;
            let py = 40.0 + y as f64 * 0.1;
            exec(db, &format!("INSERT INTO locations VALUES ({}, 'p_{}_{}', POINT({}, {}))", id, x, y, px, py));
        }
    }
    db.flush().expect("flush");
    db.checkpoint().expect("checkpoint");
    std::thread::sleep(std::time::Duration::from_millis(500));
}

fn setup_text(db: &Database) {
    exec(db, "CREATE TABLE articles (id INTEGER PRIMARY KEY, title TEXT, body TEXT)");
    exec(db, "CREATE TEXT INDEX articles_body ON articles(body)");

    let docs = [
        (1, "Intro to Rust", "Rust is a systems programming language focused on safety and performance"),
        (2, "Rust Concurrency", "Rust provides fearless concurrency with threads and async"),
        (3, "Python Basics", "Python is a popular programming language for data science"),
        (4, "Database Design", "Database indexing improves query performance significantly"),
        (5, "Vector Search", "Vector databases enable similarity search using embeddings"),
        (6, "Spatial Data", "Spatial indexing with R-trees enables efficient geo queries"),
        (7, "Rust vs C++", "Rust offers memory safety without garbage collection unlike C++"),
        (8, "ML Pipelines", "Machine learning pipelines process data for model training"),
    ];

    for (id, title, body) in &docs {
        let title_escaped = title.replace("'", "''");
        let body_escaped = body.replace("'", "''");
        exec(db, &format!("INSERT INTO articles VALUES ({}, '{}', '{}')", id, title_escaped, body_escaped));
    }

    db.flush().expect("flush");
    db.checkpoint().expect("checkpoint");
    std::thread::sleep(std::time::Duration::from_millis(500));
}

// ============================================================================
// Spatial Fast Path Tests
// ============================================================================

#[test]
fn test_st_within_basic() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    // All points are in [116.0, 116.4] × [40.0, 40.4]
    let result = rows(&db, "SELECT * FROM locations WHERE ST_WITHIN(coords, 116.0, 40.0, 117.0, 41.0)");
    assert_eq!(result.len(), 25, "All 25 points should be within the large bbox");
}

#[test]
fn test_st_within_narrow_bbox() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    // Narrow bbox around (116.0, 40.0) — should match only nearby points
    let result = rows(&db, "SELECT * FROM locations WHERE ST_WITHIN(coords, 115.95, 39.95, 116.05, 40.05)");
    assert!(!result.is_empty(), "Should find at least the origin point");
    assert!(result.len() <= 4, "Narrow bbox should match few points");
}

#[test]
fn test_st_within_no_results() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db, "SELECT * FROM locations WHERE ST_WITHIN(coords, 0.0, 0.0, 1.0, 1.0)");
    assert!(result.is_empty(), "No points should be in Africa");
}

#[test]
fn test_st_knn_basic() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db, "SELECT * FROM locations WHERE ST_KNN(coords, 116.0, 40.0, 3)");
    assert_eq!(result.len(), 3, "KNN should return exactly 3 results");
}

#[test]
fn test_st_knn_k_larger_than_data() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db, "SELECT * FROM locations WHERE ST_KNN(coords, 116.0, 40.0, 100)");
    // Should return all or most points
    assert!(result.len() >= 20, "KNN with k > data should return most points");
}

#[test]
fn test_st_distance_order_by() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db,
        "SELECT id, name, ST_DISTANCE(coords, 116.0, 40.0) AS dist FROM locations ORDER BY dist LIMIT 5");
    assert_eq!(result.len(), 5, "Should return top 5 results");

    // Distances should be ascending (or non-decreasing)
    for i in 1..result.len() {
        let d_prev = match &result[i-1][2] { Value::Float(d) => *d, _ => f64::MAX };
        let d_curr = match &result[i][2] { Value::Float(d) => *d, _ => f64::MIN };
        assert!(d_curr >= d_prev - 0.01, "Distances should be ascending: {} vs {}", d_prev, d_curr);
    }
}

#[test]
fn test_st_knn_returns_nearby() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    // Query near (116.2, 40.2) — should find points near that area
    let result = rows(&db, "SELECT * FROM locations WHERE ST_KNN(coords, 116.2, 40.2, 3)");
    assert_eq!(result.len(), 3, "KNN should return 3 results");
}

// ============================================================================
// Text Search Fast Path Tests
// ============================================================================

#[test]
fn test_match_against_basic() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT id, title FROM articles WHERE MATCH(body) AGAINST('Rust programming') ORDER BY id");
    assert!(!result.is_empty(), "Should find Rust-related articles");

    // Should find at least docs 1, 2
    let ids: Vec<i64> = result.iter().filter_map(|r| match &r[0] {
        Value::Integer(i) => Some(*i),
        _ => None,
    }).collect();
    assert!(ids.contains(&1), "Should find 'Intro to Rust'");
    assert!(ids.contains(&2), "Should find 'Rust Concurrency'");
}

#[test]
fn test_match_against_with_score() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT id, MATCH(body) AGAINST('Rust') AS score FROM articles WHERE MATCH(body) AGAINST('Rust') ORDER BY score DESC LIMIT 5");

    assert!(!result.is_empty(), "Should find results");

    // All scores should be positive
    for row in &result {
        if let Value::Float(score) = row[1] {
            assert!(score > 0.0, "Score should be positive, got {}", score);
        }
    }
}

#[test]
fn test_match_against_no_results() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT id FROM articles WHERE MATCH(body) AGAINST('xyznonexistent')");
    assert!(result.is_empty(), "Should find nothing for nonsense query");
}

#[test]
fn test_match_against_single_term() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT id FROM articles WHERE MATCH(body) AGAINST('spatial') LIMIT 5");
    assert!(!result.is_empty(), "Should find 'spatial' in article 6");
}

#[test]
fn test_match_against_limit() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT id FROM articles WHERE MATCH(body) AGAINST('database') LIMIT 2");
    assert!(result.len() <= 2, "Should respect LIMIT");
}

#[test]
fn test_match_against_phrase() {
    let (db, _dir) = create_db();
    setup_text(&db);

    // Exact phrase: "Machine learning" should match only the article with that exact sequence
    let result = rows(&db,
        "SELECT id FROM articles WHERE MATCH(body) AGAINST('\"machine learning\"')");
    assert_eq!(result.len(), 1, "Phrase 'machine learning' should match exactly 1 article");
    assert_eq!(result[0][0], Value::Integer(8));
}

#[test]
fn test_match_against_phrase_no_match() {
    let (db, _dir) = create_db();
    setup_text(&db);

    // "learning machine" is NOT in any document (words are in wrong order)
    let result = rows(&db,
        "SELECT id FROM articles WHERE MATCH(body) AGAINST('\"learning machine\"')");
    assert!(result.is_empty(), "Phrase 'learning machine' should not match (wrong order)");
}

// ============================================================================
// Vector Fast Path Tests
// ============================================================================

#[test]
fn test_vector_order_by_returns_results() {
    let (db, _dir) = create_db();

    exec(&db, "CREATE TABLE items (id INTEGER PRIMARY KEY, name TEXT, emb VECTOR(4))");
    exec(&db, "CREATE VECTOR INDEX items_emb ON items(emb)");

    for i in 1..=10i64 {
        let v = format!("[{:.1}, {:.1}, {:.1}, {:.1}]", i as f64, i as f64, i as f64, i as f64);
        exec(&db, &format!("INSERT INTO items VALUES ({}, 'item_{}', {})", i, i, v));
    }

    db.flush().expect("flush");
    db.checkpoint().expect("checkpoint");
    std::thread::sleep(std::time::Duration::from_millis(500));

    let result = rows(&db,
        "SELECT id, name FROM items ORDER BY emb <-> [5.0, 5.0, 5.0, 5.0] LIMIT 3");
    assert_eq!(result.len(), 3, "Should return top 3");

    // With L2 distance, [5,5,5,5] should be closest to id=5 (distance=0)
    assert_eq!(result[0][0], Value::Integer(5), "Closest should be id=5");
}

#[test]
fn test_vector_order_by_with_distance() {
    let (db, _dir) = create_db();

    exec(&db, "CREATE TABLE vecs (id INTEGER PRIMARY KEY, v VECTOR(4))");
    exec(&db, "CREATE VECTOR INDEX vecs_v ON vecs(v)");

    for i in 1..=20i64 {
        let v = format!("[{:.1}, {:.1}, {:.1}, {:.1}]", i as f64, i as f64, i as f64, i as f64);
        exec(&db, &format!("INSERT INTO vecs VALUES ({}, {})", i, v));
    }

    db.flush().expect("flush");
    db.checkpoint().expect("checkpoint");
    std::thread::sleep(std::time::Duration::from_millis(500));

    let result = rows(&db,
        "SELECT id, v <-> [10.0, 10.0, 10.0, 10.0] AS dist FROM vecs ORDER BY dist LIMIT 5");
    assert_eq!(result.len(), 5, "Should return top 5");

    // Distances should be non-negative
    for row in &result {
        if let Value::Float(d) = row[1] {
            assert!(d >= 0.0, "Distance should be non-negative");
        }
    }
}

// ============================================================================
// Mixed / Complex Queries
// ============================================================================

#[test]
fn test_select_star_with_st_within() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db, "SELECT * FROM locations WHERE ST_WITHIN(coords, 116.0, 40.0, 116.15, 40.15)");
    assert!(!result.is_empty(), "SELECT * should work with ST_WITHIN");

    // Each row should have at least id and name columns
    for row in &result {
        assert!(row.len() >= 2, "Row should have at least id and name");
    }
}

#[test]
fn test_st_distance_order_by_with_limit_1() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db,
        "SELECT id FROM locations ORDER BY ST_DISTANCE(coords, 116.0, 40.0) LIMIT 1");
    assert_eq!(result.len(), 1, "Should return 1 result");
}

#[test]
fn test_match_against_select_specific_columns() {
    let (db, _dir) = create_db();
    setup_text(&db);

    let result = rows(&db,
        "SELECT title FROM articles WHERE MATCH(body) AGAINST('vector search')");
    assert!(!result.is_empty(), "Should find vector search articles");

    // Should only return the title column
    for row in &result {
        assert_eq!(row.len(), 1, "Should only project requested column");
    }
}

#[test]
fn test_count_with_indexed_where() {
    let (db, _dir) = create_db();
    setup_spatial(&db);

    let result = rows(&db, "SELECT COUNT(*) as cnt FROM locations");
    assert_eq!(result[0][0], Value::Integer(25), "Should count all 25 locations");
}

// ============================================================================
// Persistence: data survives flush+checkpoint
// ============================================================================

#[test]
fn test_spatial_query_after_reopen() {
    let dir = TempDir::new().expect("temp dir");
    let path = dir.path().to_path_buf();

    // Create and populate
    {
        let db = Database::create(&path).expect("create db");
        exec(&db, "CREATE TABLE pts (id INTEGER PRIMARY KEY, loc GEOMETRY)");
        exec(&db, "CREATE SPATIAL INDEX pts_loc ON pts(loc)");
        for i in 1..=5i64 {
            exec(&db, &format!("INSERT INTO pts VALUES ({}, POINT({}, {}))", i, 116.0 + i as f64 * 0.1, 39.9));
        }
        db.flush().expect("flush");
        db.checkpoint().expect("checkpoint");
    }

    // Reopen and query
    {
        let db = Database::open(&path).expect("reopen db");
        let result = rows(&db, "SELECT * FROM pts");
        assert_eq!(result.len(), 5, "All rows should survive reopen");
    }
}

#[test]
fn test_text_search_after_insert() {
    let (db, _dir) = create_db();

    exec(&db, "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT)");
    exec(&db, "CREATE TEXT INDEX docs_body ON docs(body)");
    exec(&db, "INSERT INTO docs VALUES (1, 'hello world database')");
    exec(&db, "INSERT INTO docs VALUES (2, 'vector search engine')");

    // Before flush: search should work from pending
    let pre_flush = db.text_search_ranked("docs_body", "database", 10).unwrap();

    db.flush().expect("flush");
    db.checkpoint().expect("checkpoint");
    std::thread::sleep(std::time::Duration::from_millis(500));

    // After flush+checkpoint: search should work from disk
    let post_flush = db.text_search_ranked("docs_body", "database", 10).unwrap();

    let result = rows(&db, "SELECT id FROM docs WHERE MATCH(body) AGAINST('database')");
    assert!(!result.is_empty(), "Text search should find 'database'");
}