Skip to main content

mangle_common/
lib.rs

1// Copyright 2024 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Mangle FactStore
16//!
17//! Defines core storage interfaces (`Store`, `Host`) and a legacy in-memory storage implementation.
18
19use anyhow::{Result, anyhow};
20use ast::Arena;
21use mangle_ast as ast;
22
23mod tablestore;
24pub use tablestore::{TableConfig, TableStoreImpl, TableStoreSchema};
25
26// --- New Interfaces (Moved from interpreter/vm to break cycles) ---
27
28/// Identifies the kind of a compound value. When type information is available
29/// (concrete types), the kind is redundant. For `/any` or union-typed columns
30/// the kind tag makes the value self-describing.
31#[cfg(feature = "edge")]
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
33pub enum CompoundKind {
34    List,
35    Pair,
36    Map,
37    Struct,
38}
39
40#[cfg(feature = "edge")]
41#[derive(Debug, Clone)]
42pub enum Value {
43    Number(i64),
44    Float(f64),
45    String(String),
46    /// A Mangle name constant like `/foo/bar`. Distinct from `String` at runtime
47    /// so that `/any`-typed columns can distinguish names from string values.
48    Name(String),
49    /// Time as nanoseconds since Unix epoch (consistent with Go implementation).
50    Time(i64),
51    /// Duration as nanoseconds (consistent with Go implementation).
52    Duration(i64),
53    /// A compound value: a flat sequence of values. The `CompoundKind` tag
54    /// identifies the interpretation. For lists and pairs, elements are stored
55    /// directly. For structs and maps, keys/field-names and values are
56    /// interleaved: [k1, v1, k2, v2, ...].
57    Compound(CompoundKind, Vec<Value>),
58    Null, // Used for iteration end or missing
59}
60
61#[cfg(feature = "edge")]
62impl PartialEq for Value {
63    fn eq(&self, other: &Self) -> bool {
64        match (self, other) {
65            (Value::Number(a), Value::Number(b)) => a == b,
66            (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
67            (Value::String(a), Value::String(b)) => a == b,
68            (Value::Name(a), Value::Name(b)) => a == b,
69            (Value::Time(a), Value::Time(b)) => a == b,
70            (Value::Duration(a), Value::Duration(b)) => a == b,
71            (Value::Compound(ka, a), Value::Compound(kb, b)) => ka == kb && a == b,
72            (Value::Null, Value::Null) => true,
73            _ => false,
74        }
75    }
76}
77
78#[cfg(feature = "edge")]
79impl Eq for Value {}
80
81#[cfg(feature = "edge")]
82impl std::hash::Hash for Value {
83    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
84        std::mem::discriminant(self).hash(state);
85        match self {
86            Value::Number(n) => n.hash(state),
87            Value::Float(f) => f.to_bits().hash(state),
88            Value::String(s) => s.hash(state),
89            Value::Name(s) => s.hash(state),
90            Value::Time(t) => t.hash(state),
91            Value::Duration(d) => d.hash(state),
92            Value::Compound(k, v) => {
93                k.hash(state);
94                v.hash(state);
95            }
96            Value::Null => {}
97        }
98    }
99}
100
101#[cfg(feature = "edge")]
102impl PartialOrd for Value {
103    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
104        Some(self.cmp(other))
105    }
106}
107
108#[cfg(feature = "edge")]
109impl Ord for Value {
110    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
111        match (self, other) {
112            (Value::Number(a), Value::Number(b)) => a.cmp(b),
113            (Value::Float(a), Value::Float(b)) => a.total_cmp(b),
114            // Cross-numeric: promote integer to float for comparison
115            (Value::Number(a), Value::Float(b)) => (*a as f64).total_cmp(b),
116            (Value::Float(a), Value::Number(b)) => a.total_cmp(&(*b as f64)),
117            (Value::String(a), Value::String(b)) => a.cmp(b),
118            (Value::Name(a), Value::Name(b)) => a.cmp(b),
119            (Value::Time(a), Value::Time(b)) => a.cmp(b),
120            (Value::Duration(a), Value::Duration(b)) => a.cmp(b),
121            // Cross-type: Duration vs Number compare as i64 nanoseconds
122            (Value::Duration(a), Value::Number(b)) => a.cmp(b),
123            (Value::Number(a), Value::Duration(b)) => a.cmp(b),
124            // Cross-type: Time vs Time already handled; Time vs Number
125            (Value::Time(a), Value::Number(b)) => a.cmp(b),
126            (Value::Number(a), Value::Time(b)) => a.cmp(b),
127            (Value::Compound(ka, a), Value::Compound(kb, b)) => ka.cmp(kb).then_with(|| a.cmp(b)),
128            (Value::Null, Value::Null) => std::cmp::Ordering::Equal,
129            // Cross-variant ordering: Number/Float < String < Name < Time < Duration < Compound < Null
130            (Value::Number(_) | Value::Float(_), _) => std::cmp::Ordering::Less,
131            (_, Value::Number(_) | Value::Float(_)) => std::cmp::Ordering::Greater,
132            (Value::String(_), _) => std::cmp::Ordering::Less,
133            (_, Value::String(_)) => std::cmp::Ordering::Greater,
134            (Value::Name(_), _) => std::cmp::Ordering::Less,
135            (_, Value::Name(_)) => std::cmp::Ordering::Greater,
136            (Value::Time(_), _) => std::cmp::Ordering::Less,
137            (_, Value::Time(_)) => std::cmp::Ordering::Greater,
138            (Value::Duration(_), _) => std::cmp::Ordering::Less,
139            (_, Value::Duration(_)) => std::cmp::Ordering::Greater,
140            (Value::Compound(..), _) => std::cmp::Ordering::Less,
141            (_, Value::Compound(..)) => std::cmp::Ordering::Greater,
142        }
143    }
144}
145
146#[cfg(feature = "edge")]
147impl std::fmt::Display for Value {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        match self {
150            Value::Number(n) => write!(f, "{n}"),
151            Value::Float(v) => write!(f, "{v}"),
152            Value::String(s) => write!(f, "{s:?}"),
153            Value::Name(s) => write!(f, "{s}"),
154            Value::Time(nanos) => write!(f, "{}", format_time_nanos(*nanos)),
155            Value::Duration(nanos) => write!(f, "{}", format_duration_nanos(*nanos)),
156            Value::Compound(kind, elems) => match kind {
157                CompoundKind::List | CompoundKind::Pair => {
158                    write!(f, "[")?;
159                    for (i, e) in elems.iter().enumerate() {
160                        if i > 0 {
161                            write!(f, ", ")?;
162                        }
163                        write!(f, "{e}")?;
164                    }
165                    write!(f, "]")
166                }
167                CompoundKind::Map => {
168                    write!(f, "[")?;
169                    for (i, pair) in elems.chunks_exact(2).enumerate() {
170                        if i > 0 {
171                            write!(f, ", ")?;
172                        }
173                        write!(f, "{}: {}", pair[0], pair[1])?;
174                    }
175                    write!(f, "]")
176                }
177                CompoundKind::Struct => {
178                    write!(f, "{{")?;
179                    for (i, pair) in elems.chunks_exact(2).enumerate() {
180                        if i > 0 {
181                            write!(f, ", ")?;
182                        }
183                        write!(f, "{}: {}", pair[0], pair[1])?;
184                    }
185                    write!(f, "}}")
186                }
187            },
188            Value::Null => write!(f, "null"),
189        }
190    }
191}
192
193#[cfg(feature = "edge")]
194fn format_time_nanos(nanos: i64) -> String {
195    let secs = nanos.div_euclid(1_000_000_000);
196    let ns = nanos.rem_euclid(1_000_000_000) as u32;
197
198    // Convert seconds since epoch to date/time components
199    // Using a simplified algorithm (valid for dates from 1970 onwards)
200    let days = secs.div_euclid(86400);
201    let time_of_day = secs.rem_euclid(86400);
202    let hour = time_of_day / 3600;
203    let minute = (time_of_day % 3600) / 60;
204    let second = time_of_day % 60;
205
206    // Civil date from days since epoch (algorithm from Howard Hinnant)
207    let z = days + 719468;
208    let era = (if z >= 0 { z } else { z - 146096 }) / 146097;
209    let doe = (z - era * 146097) as u32;
210    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
211    let y = yoe as i64 + era * 400;
212    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
213    let mp = (5 * doy + 2) / 153;
214    let d = doy - (153 * mp + 2) / 5 + 1;
215    let m = if mp < 10 { mp + 3 } else { mp - 9 };
216    let y = if m <= 2 { y + 1 } else { y };
217
218    if ns == 0 {
219        format!("{y:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}Z")
220    } else {
221        // Trim trailing zeros from fractional seconds
222        let mut frac = format!("{ns:09}");
223        frac = frac.trim_end_matches('0').to_string();
224        format!("{y:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{frac}Z")
225    }
226}
227
228/// Formats a duration in nanoseconds to match Go's time.Duration.String() output.
229/// Produces compound forms like "1h30m0s", "2.5s", "500ms", "150ns".
230#[cfg(feature = "edge")]
231fn format_duration_nanos(nanos: i64) -> String {
232    if nanos == 0 {
233        return "0s".to_string();
234    }
235
236    let mut result = String::new();
237    let mut remaining = if nanos < 0 {
238        result.push('-');
239        nanos.unsigned_abs()
240    } else {
241        nanos as u64
242    };
243
244    const NANOS_PER_US: u64 = 1_000;
245    const NANOS_PER_MS: u64 = 1_000_000;
246    const NANOS_PER_S: u64 = 1_000_000_000;
247    const NANOS_PER_M: u64 = 60 * NANOS_PER_S;
248    const NANOS_PER_H: u64 = 60 * NANOS_PER_M;
249
250    // For durations >= 1s, Go uses compound h/m/s format
251    if remaining >= NANOS_PER_S {
252        let hours = remaining / NANOS_PER_H;
253        remaining %= NANOS_PER_H;
254        let minutes = remaining / NANOS_PER_M;
255        remaining %= NANOS_PER_M;
256        let seconds = remaining / NANOS_PER_S;
257        let sub_second_nanos = remaining % NANOS_PER_S;
258
259        if hours > 0 {
260            result.push_str(&format!("{hours}h"));
261        }
262        if minutes > 0 || hours > 0 {
263            result.push_str(&format!("{minutes}m"));
264        }
265        if sub_second_nanos == 0 {
266            result.push_str(&format!("{seconds}s"));
267        } else {
268            // Format fractional seconds, trimming trailing zeros
269            let frac = format!("{sub_second_nanos:09}");
270            let frac = frac.trim_end_matches('0');
271            result.push_str(&format!("{seconds}.{frac}s"));
272        }
273    } else if remaining >= NANOS_PER_MS {
274        // Milliseconds with optional fractional part
275        let ms = remaining / NANOS_PER_MS;
276        let sub = remaining % NANOS_PER_MS;
277        if sub == 0 {
278            result.push_str(&format!("{ms}ms"));
279        } else {
280            let frac = format!("{sub:06}");
281            let frac = frac.trim_end_matches('0');
282            result.push_str(&format!("{ms}.{frac}ms"));
283        }
284    } else if remaining >= NANOS_PER_US {
285        // Microseconds
286        let us = remaining / NANOS_PER_US;
287        let sub = remaining % NANOS_PER_US;
288        if sub == 0 {
289            result.push_str(&format!("{us}µs"));
290        } else {
291            let frac = format!("{sub:03}");
292            let frac = frac.trim_end_matches('0');
293            result.push_str(&format!("{us}.{frac}µs"));
294        }
295    } else {
296        // Nanoseconds
297        result.push_str(&format!("{}ns", remaining));
298    }
299
300    result
301}
302
303/// Abstract interface for relation storage (Edge Mode).
304///
305/// Tuples are currently stored as `Vec<Value>` where compound values
306/// (lists, structs, maps) appear as `Value::Compound(...)`.
307///
308/// TODO: Support explicit table flattening. When a user annotates a type
309/// declaration, compound columns should be inlined into the tuple as
310/// length-prefixed sequences of scalar values. This flattening should
311/// only apply to explicitly requested levels of the type tree (no
312/// automatic recursive flattening). The Store would then see wider
313/// tuples of scalar values instead of Compound entries.
314#[cfg(feature = "edge")]
315pub trait Store {
316    /// Returns an iterator over all tuples in the relation.
317    /// Returns an error if the relation does not exist.
318    fn scan(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
319
320    /// Returns an iterator over only the new tuples added in the last iteration.
321    fn scan_delta(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
322
323    /// Returns an iterator over tuples being collected for the next iteration.
324    fn scan_next_delta(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
325
326    /// Returns an iterator over tuples in the relation matching a key in a column.
327    fn scan_index(
328        &self,
329        relation: &str,
330        col_idx: usize,
331        key: &Value,
332    ) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
333
334    /// Returns an iterator over delta tuples matching a key in a column.
335    fn scan_delta_index(
336        &self,
337        relation: &str,
338        col_idx: usize,
339        key: &Value,
340    ) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
341
342    /// Inserts a tuple into the relation (specifically into the delta/new set).
343    /// Returns true if it was new.
344    fn insert(&mut self, relation: &str, tuple: Vec<Value>) -> Result<bool>;
345
346    /// Merges current deltas into the stable set of facts.
347    fn merge_deltas(&mut self);
348
349    /// Ensures a relation exists in the store.
350    fn create_relation(&mut self, relation: &str);
351
352    /// Removes a specific tuple from the relation's stable set.
353    /// Returns true if the tuple was found and removed.
354    fn retract(&mut self, relation: &str, tuple: &[Value]) -> Result<bool>;
355
356    /// Removes all tuples from a relation (stable, delta, and next_delta).
357    fn clear(&mut self, relation: &str);
358
359    /// Returns the names of all relations in the store.
360    fn relation_names(&self) -> Vec<String>;
361
362    /// Coalesce temporal intervals for a relation.
363    /// Groups facts by their non-temporal columns (all except the last 2),
364    /// sorts intervals by start time, and merges overlapping/adjacent intervals.
365    /// Default implementation is a no-op.
366    fn coalesce_temporal(&mut self, _relation: &str) {}
367}
368
369/// Opaque handle to a value in the host's value store.
370/// In WASM, these are represented as `externref`.
371#[cfg(feature = "server")]
372#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
373pub struct HostVal(pub u32);
374
375/// Trait for the host environment that provides storage and data access (Server Mode).
376///
377/// All Mangle values (numbers, floats, strings, compounds) are represented as
378/// opaque `HostVal` handles. In WASM, these map to `externref` — the WASM module
379/// never inspects values directly; all operations go through host calls.
380#[cfg(feature = "server")]
381pub trait Host {
382    // --- Iterator/scan (control flow) ---
383    fn scan_start(&mut self, rel_id: i32) -> i32;
384    fn scan_delta_start(&mut self, rel_id: i32) -> i32;
385    fn scan_next(&mut self, iter_id: i32) -> i32;
386    /// Merges deltas and returns 1 if changes occurred, 0 otherwise.
387    fn merge_deltas(&mut self) -> i32;
388    fn scan_aggregate_start(&mut self, rel_id: i32, description: Vec<i32>) -> i32;
389    fn scan_index_start(&mut self, rel_id: i32, col_idx: i32, val: HostVal) -> i32;
390
391    // --- Value access ---
392    fn get_col(&mut self, tuple_ptr: i32, col_idx: i32) -> HostVal;
393
394    // --- Multi-column insertion ---
395    fn insert_begin(&mut self, rel_id: i32);
396    fn insert_push(&mut self, val: HostVal);
397    fn insert_end(&mut self);
398
399    // --- Constants ---
400    fn const_number(&mut self, n: i64) -> HostVal;
401    fn const_float(&mut self, bits: i64) -> HostVal;
402    fn const_string(&mut self, id: i32) -> HostVal;
403    fn const_name(&mut self, id: i32) -> HostVal;
404    fn const_time(&mut self, nanos: i64) -> HostVal;
405    fn const_duration(&mut self, nanos: i64) -> HostVal;
406
407    // --- Arithmetic (handles int/float promotion internally) ---
408    fn val_add(&mut self, a: HostVal, b: HostVal) -> HostVal;
409    fn val_sub(&mut self, a: HostVal, b: HostVal) -> HostVal;
410    fn val_mul(&mut self, a: HostVal, b: HostVal) -> HostVal;
411    fn val_div(&mut self, a: HostVal, b: HostVal) -> HostVal;
412    fn val_sqrt(&mut self, a: HostVal) -> HostVal;
413
414    // --- Comparisons (return 0 or 1) ---
415    fn val_eq(&mut self, a: HostVal, b: HostVal) -> i32;
416    fn val_neq(&mut self, a: HostVal, b: HostVal) -> i32;
417    fn val_lt(&mut self, a: HostVal, b: HostVal) -> i32;
418    fn val_le(&mut self, a: HostVal, b: HostVal) -> i32;
419    fn val_gt(&mut self, a: HostVal, b: HostVal) -> i32;
420    fn val_ge(&mut self, a: HostVal, b: HostVal) -> i32;
421
422    // --- String operations ---
423    fn str_concat(&mut self, a: HostVal, b: HostVal) -> HostVal;
424    fn str_replace(&mut self, s: HostVal, old: HostVal, new: HostVal, count: HostVal) -> HostVal;
425    fn val_to_string(&mut self, val: HostVal) -> HostVal;
426
427    // --- Compound operations ---
428    /// Begin building a compound value. kind: 0=List, 1=Pair, 2=Map, 3=Struct.
429    fn compound_begin(&mut self, kind: i32);
430    fn compound_push(&mut self, val: HostVal);
431    fn compound_end(&mut self) -> HostVal;
432    /// Get element by index (list) or value by key (map/struct).
433    fn compound_get(&mut self, compound: HostVal, key: HostVal) -> HostVal;
434    /// Get length/size of compound, returned as a Number HostVal.
435    fn compound_len(&mut self, compound: HostVal) -> HostVal;
436    fn pair_first(&mut self, compound: HostVal) -> HostVal;
437    fn pair_second(&mut self, compound: HostVal) -> HostVal;
438
439    // --- Debug ---
440    fn debuglog(&mut self, val: HostVal);
441
442    // --- HashJoin ---
443    //
444    // Protocol:
445    //   1. `hash_join_begin(id)` — initialize / reset the table for `id`.
446    //   2. For each build row: N calls to `hash_join_push(val)` followed by
447    //      `hash_join_commit_build(id, n_keys)`. The first `n_keys` of the
448    //      pushed values form the join key; the rest are the payload.
449    //   3. For each probe tuple: K calls to `hash_join_push(val)` with the
450    //      key values, followed by `hash_join_probe(id)`, which returns an
451    //      iter_id usable with the existing `scan_next` / `get_col`.
452    //   4. `hash_join_end(id)` — drop the table.
453    //
454    // Default impls panic so existing Host implementations compile until
455    // they opt into HashJoin support.
456    fn hash_join_begin(&mut self, _join_id: i32) {
457        unimplemented!("hash_join_begin");
458    }
459    fn hash_join_push(&mut self, _val: HostVal) {
460        unimplemented!("hash_join_push");
461    }
462    fn hash_join_commit_build(&mut self, _join_id: i32, _n_keys: i32) {
463        unimplemented!("hash_join_commit_build");
464    }
465    fn hash_join_probe(&mut self, _join_id: i32) -> i32 {
466        unimplemented!("hash_join_probe");
467    }
468    fn hash_join_end(&mut self, _join_id: i32) {
469        unimplemented!("hash_join_end");
470    }
471}
472
473// --- Legacy Interfaces ---
474
475pub trait Receiver<'a> {
476    fn next(&self, item: &'a ast::Atom<'a>) -> Result<()>;
477}
478
479impl<'a, Closure: Fn(&'a ast::Atom<'a>) -> Result<()>> Receiver<'a> for Closure {
480    fn next(&self, item: &'a ast::Atom<'a>) -> Result<()> {
481        (*self)(item)
482    }
483}
484
485/// Lifetime 'a is used for data held by this store.
486pub trait ReadOnlyFactStore<'a> {
487    fn arena(&'a self) -> &'a Arena;
488
489    fn contains<'src>(&'a self, src: &'src Arena, fact: &'src ast::Atom<'src>) -> Result<bool>;
490
491    // Sends atoms that matches query `Atom{ sym: query_sym, args: query_args}`.
492    // pub sym: PredicateIndex,
493    fn get<'query, R: Receiver<'a>>(
494        &'a self,
495        query_sym: ast::PredicateIndex,
496        query_args: &'query [&'query ast::BaseTerm<'query>],
497        cb: &R,
498    ) -> Result<()>;
499
500    // Invokes cb for every predicate available in this store.
501    // It would be nice to use `impl Iterator` here.
502    fn predicates(&'a self) -> Vec<ast::PredicateIndex>;
503
504    // Returns approximae number of facts.
505    fn estimate_fact_count(&self) -> u32;
506}
507
508/// A fact store that can be mutated.
509/// Implementations must make use of interior mutability.
510pub trait FactStore<'a>: ReadOnlyFactStore<'a> {
511    /// Returns true if fact did not exist before.
512    /// The fact is copied.
513    fn add<'src>(&'a self, src: &'src Arena, fact: &'src ast::Atom<'src>) -> Result<bool>;
514
515    /// Adds all facts from given store.
516    fn merge<'src, S>(&'a self, src: &'src Arena, store: &'src S)
517    where
518        S: ReadOnlyFactStore<'src>;
519}
520
521/// Invokes cb for every fact in the store.
522pub fn get_all_facts<'a, S, R: Receiver<'a>>(store: &'a S, cb: &R) -> Result<()>
523where
524    S: ReadOnlyFactStore<'a> + 'a,
525{
526    let arena = Arena::new_with_global_interner();
527    let preds = store.predicates();
528
529    for pred in preds {
530        arena.copy_predicate_sym(store.arena(), pred);
531        store.get(pred, arena.new_query(pred).args, cb)?;
532    }
533    Ok(())
534}