nodedb 0.3.0

Local-first, real-time, edge-to-cloud hybrid database for multi-modal workloads
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
// SPDX-License-Identifier: BUSL-1.1

//! Per-client shape registry: tracks which shapes each connected client
//! is subscribed to and evaluates mutations against active shapes.

use std::collections::HashMap;
use std::sync::RwLock;
use std::time::Instant;

use tracing::{debug, info, warn};

use nodedb_query::metadata_filter::matches_metadata_filter;
use nodedb_types::filter::MetadataFilter;

use super::definition::{ShapeDefinition, ShapeId, ShapeType};

/// Per-session shape subscription state.
struct ClientShapes {
    /// Active shape subscriptions: shape_id → definition.
    shapes: HashMap<ShapeId, ShapeDefinition>,
    /// Tenant ID.
    tenant_id: u64,
    /// When shapes were last modified.
    last_modified: Instant,
}

/// Registry of all active shape subscriptions across all sync sessions.
///
/// Thread-safe (RwLock): mutations are evaluated against shapes from
/// the WAL tail loop (single writer), while subscribe/unsubscribe
/// comes from sync sessions (multiple writers).
pub struct ShapeRegistry {
    /// Per-session shapes: session_id → ClientShapes.
    sessions: RwLock<HashMap<String, ClientShapes>>,
}

impl Default for ShapeRegistry {
    fn default() -> Self {
        Self::new()
    }
}

impl ShapeRegistry {
    pub fn new() -> Self {
        Self {
            sessions: RwLock::new(HashMap::new()),
        }
    }

    /// Register a shape subscription for a session.
    pub fn subscribe(&self, session_id: &str, tenant_id: u64, shape: ShapeDefinition) {
        let mut sessions =
            crate::control::lock_utils::write_or_recover(self.sessions.write(), "shape_sessions");
        let client = sessions
            .entry(session_id.to_string())
            .or_insert_with(|| ClientShapes {
                shapes: HashMap::new(),
                tenant_id,
                last_modified: Instant::now(),
            });
        info!(
            session = session_id,
            shape_id = %shape.shape_id,
            "shape subscribed"
        );
        client
            .shapes
            .insert(ShapeId::from_validated(shape.shape_id.clone()), shape);
        client.last_modified = Instant::now();
    }

    /// Unsubscribe a shape for a session.
    pub fn unsubscribe(&self, session_id: &str, shape_id: &str) -> bool {
        let mut sessions =
            crate::control::lock_utils::write_or_recover(self.sessions.write(), "shape_sessions");
        if let Some(client) = sessions.get_mut(session_id) {
            let removed = client.shapes.remove(shape_id).is_some();
            if removed {
                debug!(session = session_id, shape_id, "shape unsubscribed");
                client.last_modified = Instant::now();
            }
            removed
        } else {
            false
        }
    }

    /// Remove all shapes for a session (disconnect cleanup).
    pub fn remove_session(&self, session_id: &str) {
        let mut sessions =
            crate::control::lock_utils::write_or_recover(self.sessions.write(), "shape_sessions");
        if let Some(client) = sessions.remove(session_id) {
            info!(
                session = session_id,
                shapes = client.shapes.len(),
                "session shapes removed"
            );
        }
    }

    /// Get all shape IDs for a session.
    pub fn shapes_for_session(&self, session_id: &str) -> Vec<ShapeId> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        sessions
            .get(session_id)
            .map(|c| c.shapes.keys().cloned().collect())
            .unwrap_or_default()
    }

    /// Evaluate a mutation against all active shapes.
    ///
    /// Returns a list of `(session_id, shape_id)` pairs for shapes that
    /// match the mutation. The caller then pushes ShapeDelta messages
    /// to the matching sessions.
    ///
    /// For `ShapeType::Document` shapes with a non-empty predicate, the
    /// predicate bytes (MessagePack-encoded `MetadataFilter`) are decoded
    /// and evaluated against `doc_json`. An empty predicate matches all
    /// documents in the collection. A predicate that cannot be decoded is
    /// logged as a warning and treated as non-matching (fail-closed).
    pub fn evaluate_mutation(
        &self,
        tenant_id: u64,
        collection: &str,
        doc_id: &str,
        doc_json: &serde_json::Value,
    ) -> Vec<(String, ShapeId)> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        let mut matches = Vec::new();

        for (session_id, client) in sessions.iter() {
            if client.tenant_id != tenant_id {
                continue;
            }
            for (shape_id, shape) in &client.shapes {
                if !shape.could_match(collection, doc_id) {
                    continue;
                }
                if let ShapeType::Document { predicate, .. } = &shape.shape_type
                    && !predicate.is_empty()
                {
                    match zerompk::from_msgpack::<MetadataFilter>(predicate) {
                        Ok(filter) => {
                            if !matches_metadata_filter(doc_json, &filter) {
                                continue;
                            }
                        }
                        Err(err) => {
                            warn!(
                                shape_id = %shape_id,
                                error = %err,
                                "failed to decode shape predicate; treating shape as non-matching"
                            );
                            continue;
                        }
                    }
                }
                matches.push((session_id.clone(), shape_id.clone()));
            }
        }

        matches
    }

    /// Get the session ID for a session's shape state.
    pub fn session_info(&self, session_id: &str) -> Option<(u64, usize)> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        sessions
            .get(session_id)
            .map(|c| (c.tenant_id, c.shapes.len()))
    }

    /// Total active shape subscriptions across all sessions.
    pub fn total_shapes(&self) -> usize {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        sessions.values().map(|c| c.shapes.len()).sum()
    }

    /// Number of sessions with active shapes.
    pub fn active_sessions(&self) -> usize {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        sessions.len()
    }

    /// Evaluate an array op against all active shapes.
    ///
    /// Returns a list of `(session_id, shape_id)` pairs for `ShapeType::Array`
    /// shapes that match the mutation. The caller then pushes `ArrayDeltaMsg`
    /// frames to the matching sessions.
    ///
    /// This is a sibling of [`evaluate_mutation`] rather than a refactor so
    /// that the existing document/vector matching path is not disturbed.
    pub fn evaluate_array_mutation(
        &self,
        tenant_id: u64,
        array_name: &str,
        coord: &[u64],
    ) -> Vec<(String, ShapeId)> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        let mut matches = Vec::new();

        for (session_id, client) in sessions.iter() {
            if client.tenant_id != tenant_id {
                continue;
            }
            for (shape_id, shape) in &client.shapes {
                if shape.matches_array_op(array_name, coord) {
                    matches.push((session_id.clone(), shape_id.clone()));
                }
            }
        }

        matches
    }

    /// Get a shape definition by session + shape_id.
    pub fn get_shape(&self, session_id: &str, shape_id: &str) -> Option<ShapeDefinition> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        sessions
            .get(session_id)
            .and_then(|c| c.shapes.get(shape_id).cloned())
    }

    /// Export all shapes for persistence (serializable snapshot).
    pub fn export_all(&self) -> Vec<(String, u64, ShapeDefinition)> {
        let sessions =
            crate::control::lock_utils::read_or_recover(self.sessions.read(), "shape_sessions");
        let mut result = Vec::new();
        for (session_id, client) in sessions.iter() {
            for shape in client.shapes.values() {
                result.push((session_id.clone(), client.tenant_id, shape.clone()));
            }
        }
        result
    }

    /// Import shapes from a persisted snapshot (called on startup).
    pub fn import(&self, shapes: Vec<(String, u64, ShapeDefinition)>) {
        let mut sessions =
            crate::control::lock_utils::write_or_recover(self.sessions.write(), "shape_sessions");
        for (session_id, tenant_id, shape) in shapes {
            let client = sessions.entry(session_id).or_insert_with(|| ClientShapes {
                shapes: HashMap::new(),
                tenant_id,
                last_modified: Instant::now(),
            });
            client
                .shapes
                .insert(ShapeId::from_validated(shape.shape_id.clone()), shape);
        }
    }

    /// Compact stale sessions: remove sessions with no shapes or sessions
    /// that haven't been modified within the given duration.
    pub fn compact(&self, max_idle: std::time::Duration) -> usize {
        let mut sessions =
            crate::control::lock_utils::write_or_recover(self.sessions.write(), "shape_sessions");
        let before = sessions.len();
        sessions.retain(|_, client| {
            !client.shapes.is_empty() && client.last_modified.elapsed() < max_idle
        });
        before - sessions.len()
    }
}

#[cfg(test)]
mod tests {
    use super::super::definition::ShapeType;
    use super::*;

    fn make_doc_shape(id: &str, collection: &str) -> ShapeDefinition {
        ShapeDefinition {
            shape_id: id.into(),
            tenant_id: 1,
            shape_type: ShapeType::Document {
                collection: collection.into(),
                predicate: Vec::new(),
            },
            description: format!("all {collection}"),
            field_filter: vec![],
        }
    }

    #[test]
    fn subscribe_and_query() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "orders"));
        reg.subscribe("s1", 1, make_doc_shape("sh2", "users"));

        assert_eq!(reg.total_shapes(), 2);
        assert_eq!(reg.active_sessions(), 1);
        assert_eq!(reg.shapes_for_session("s1").len(), 2);
    }

    #[test]
    fn evaluate_mutation_matches() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "orders"));
        reg.subscribe("s2", 1, make_doc_shape("sh2", "orders"));
        reg.subscribe("s3", 2, make_doc_shape("sh3", "orders")); // Different tenant.

        let doc = serde_json::json!({"status": "open"});
        let matches = reg.evaluate_mutation(1, "orders", "o1", &doc);
        assert_eq!(matches.len(), 2); // s1 and s2, not s3 (wrong tenant).
    }

    #[test]
    fn unsubscribe() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "orders"));
        assert_eq!(reg.total_shapes(), 1);

        assert!(reg.unsubscribe("s1", "sh1"));
        assert_eq!(reg.total_shapes(), 0);

        assert!(!reg.unsubscribe("s1", "sh1")); // Already removed.
    }

    #[test]
    fn remove_session() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "orders"));
        reg.subscribe("s1", 1, make_doc_shape("sh2", "users"));

        reg.remove_session("s1");
        assert_eq!(reg.total_shapes(), 0);
        assert_eq!(reg.active_sessions(), 0);
    }

    #[test]
    fn no_match_wrong_collection() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "orders"));

        let doc = serde_json::json!({});
        let matches = reg.evaluate_mutation(1, "users", "u1", &doc);
        assert!(matches.is_empty());
    }

    fn make_doc_shape_with_predicate(
        id: &str,
        collection: &str,
        predicate: Vec<u8>,
    ) -> ShapeDefinition {
        ShapeDefinition {
            shape_id: id.into(),
            tenant_id: 1,
            shape_type: ShapeType::Document {
                collection: collection.into(),
                predicate,
            },
            description: format!("filtered {collection}"),
            field_filter: vec![],
        }
    }

    #[test]
    fn predicate_eq_routes_matching_doc() {
        use nodedb_types::filter::MetadataFilter;
        let reg = ShapeRegistry::new();
        let filter = MetadataFilter::eq("kind", "Rule");
        let predicate = zerompk::to_msgpack_vec(&filter).expect("encode");
        reg.subscribe(
            "s1",
            1,
            make_doc_shape_with_predicate("sh1", "entries", predicate),
        );

        let matching = serde_json::json!({"kind": "Rule", "name": "test"});
        let non_matching = serde_json::json!({"kind": "Note", "name": "test"});

        let m1 = reg.evaluate_mutation(1, "entries", "doc1", &matching);
        assert_eq!(m1.len(), 1, "matching doc must route to shape");

        let m2 = reg.evaluate_mutation(1, "entries", "doc2", &non_matching);
        assert!(m2.is_empty(), "non-matching doc must not route to shape");
    }

    #[test]
    fn predicate_and_routes_correctly() {
        use nodedb_types::filter::MetadataFilter;
        let reg = ShapeRegistry::new();
        let filter = MetadataFilter::and(vec![
            MetadataFilter::eq("kind", "Rule"),
            MetadataFilter::eq("share", "team"),
        ]);
        let predicate = zerompk::to_msgpack_vec(&filter).expect("encode");
        reg.subscribe(
            "s1",
            1,
            make_doc_shape_with_predicate("sh1", "entries", predicate),
        );

        let both_match = serde_json::json!({"kind": "Rule", "share": "team"});
        let partial = serde_json::json!({"kind": "Rule", "share": "personal"});

        assert_eq!(
            reg.evaluate_mutation(1, "entries", "d1", &both_match).len(),
            1
        );
        assert!(
            reg.evaluate_mutation(1, "entries", "d2", &partial)
                .is_empty()
        );
    }

    #[test]
    fn empty_predicate_matches_all() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_doc_shape("sh1", "entries"));

        let any_doc = serde_json::json!({"anything": "goes"});
        let matches = reg.evaluate_mutation(1, "entries", "d1", &any_doc);
        assert_eq!(matches.len(), 1);
    }

    #[test]
    fn invalid_predicate_bytes_excluded() {
        let reg = ShapeRegistry::new();
        // Garbage bytes — cannot decode to MetadataFilter.
        let bad_predicate = vec![0xFF, 0xFE, 0x01, 0x02];
        reg.subscribe(
            "s1",
            1,
            make_doc_shape_with_predicate("sh1", "entries", bad_predicate),
        );

        let doc = serde_json::json!({"kind": "Rule"});
        let matches = reg.evaluate_mutation(1, "entries", "d1", &doc);
        assert!(
            matches.is_empty(),
            "bad predicate must be non-matching (fail-closed)"
        );
    }

    fn make_array_shape(id: &str, array: &str) -> ShapeDefinition {
        ShapeDefinition {
            shape_id: id.into(),
            tenant_id: 1,
            shape_type: ShapeType::Array {
                array_name: array.into(),
                coord_range: None,
            },
            description: format!("all {array}"),
            field_filter: vec![],
        }
    }

    #[test]
    fn evaluate_array_mutation_matches() {
        let reg = ShapeRegistry::new();
        reg.subscribe("s1", 1, make_array_shape("ah1", "prices"));
        reg.subscribe("s2", 1, make_array_shape("ah2", "prices"));
        reg.subscribe("s3", 2, make_array_shape("ah3", "prices")); // Different tenant.
        reg.subscribe("s4", 1, make_array_shape("ah4", "other")); // Different array.

        let matches = reg.evaluate_array_mutation(1, "prices", &[5, 5]);
        assert_eq!(matches.len(), 2);
        let session_ids: Vec<&str> = matches.iter().map(|(s, _)| s.as_str()).collect();
        assert!(session_ids.contains(&"s1"));
        assert!(session_ids.contains(&"s2"));
    }

    #[test]
    fn evaluate_array_mutation_coord_range_filter() {
        use nodedb_types::sync::shape::ArrayCoordRange;
        let reg = ShapeRegistry::new();
        let in_range = ShapeDefinition {
            shape_id: "ar1".into(),
            tenant_id: 1,
            shape_type: ShapeType::Array {
                array_name: "mat".into(),
                coord_range: Some(ArrayCoordRange {
                    start: vec![0],
                    end: Some(vec![10]),
                }),
            },
            description: "narrow".into(),
            field_filter: vec![],
        };
        let all = make_array_shape("ar2", "mat");
        reg.subscribe("s1", 1, in_range);
        reg.subscribe("s2", 1, all);

        // coord = [5] — in range for both.
        let matches = reg.evaluate_array_mutation(1, "mat", &[5]);
        assert_eq!(matches.len(), 2);

        // coord = [50] — only the unbounded shape matches.
        let matches = reg.evaluate_array_mutation(1, "mat", &[50]);
        assert_eq!(matches.len(), 1);
        assert_eq!(matches[0].0, "s2");
    }
}