Skip to main content

mangle_common/
lib.rs

1// Copyright 2024 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Mangle FactStore
16//!
17//! Defines core storage interfaces (`Store`, `Host`) and a legacy in-memory storage implementation.
18
19use anyhow::{Result, anyhow};
20use ast::Arena;
21use mangle_ast as ast;
22
23mod tablestore;
24pub use tablestore::{TableConfig, TableStoreImpl, TableStoreSchema};
25
26// --- New Interfaces (Moved from interpreter/vm to break cycles) ---
27
28/// Identifies the kind of a compound value. When type information is available
29/// (concrete types), the kind is redundant. For `/any` or union-typed columns
30/// the kind tag makes the value self-describing.
31#[cfg(feature = "edge")]
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
33pub enum CompoundKind {
34    List,
35    Pair,
36    Map,
37    Struct,
38}
39
40#[cfg(feature = "edge")]
41#[derive(Debug, Clone)]
42pub enum Value {
43    Number(i64),
44    Float(f64),
45    String(String),
46    /// Time as nanoseconds since Unix epoch (consistent with Go implementation).
47    Time(i64),
48    /// Duration as nanoseconds (consistent with Go implementation).
49    Duration(i64),
50    /// A compound value: a flat sequence of values. The `CompoundKind` tag
51    /// identifies the interpretation. For lists and pairs, elements are stored
52    /// directly. For structs and maps, keys/field-names and values are
53    /// interleaved: [k1, v1, k2, v2, ...].
54    Compound(CompoundKind, Vec<Value>),
55    Null, // Used for iteration end or missing
56}
57
58#[cfg(feature = "edge")]
59impl PartialEq for Value {
60    fn eq(&self, other: &Self) -> bool {
61        match (self, other) {
62            (Value::Number(a), Value::Number(b)) => a == b,
63            (Value::Float(a), Value::Float(b)) => a.to_bits() == b.to_bits(),
64            (Value::String(a), Value::String(b)) => a == b,
65            (Value::Time(a), Value::Time(b)) => a == b,
66            (Value::Duration(a), Value::Duration(b)) => a == b,
67            (Value::Compound(ka, a), Value::Compound(kb, b)) => ka == kb && a == b,
68            (Value::Null, Value::Null) => true,
69            _ => false,
70        }
71    }
72}
73
74#[cfg(feature = "edge")]
75impl Eq for Value {}
76
77#[cfg(feature = "edge")]
78impl std::hash::Hash for Value {
79    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
80        std::mem::discriminant(self).hash(state);
81        match self {
82            Value::Number(n) => n.hash(state),
83            Value::Float(f) => f.to_bits().hash(state),
84            Value::String(s) => s.hash(state),
85            Value::Time(t) => t.hash(state),
86            Value::Duration(d) => d.hash(state),
87            Value::Compound(k, v) => {
88                k.hash(state);
89                v.hash(state);
90            }
91            Value::Null => {}
92        }
93    }
94}
95
96#[cfg(feature = "edge")]
97impl PartialOrd for Value {
98    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
99        Some(self.cmp(other))
100    }
101}
102
103#[cfg(feature = "edge")]
104impl Ord for Value {
105    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
106        match (self, other) {
107            (Value::Number(a), Value::Number(b)) => a.cmp(b),
108            (Value::Float(a), Value::Float(b)) => a.total_cmp(b),
109            // Cross-numeric: promote integer to float for comparison
110            (Value::Number(a), Value::Float(b)) => (*a as f64).total_cmp(b),
111            (Value::Float(a), Value::Number(b)) => a.total_cmp(&(*b as f64)),
112            (Value::String(a), Value::String(b)) => a.cmp(b),
113            (Value::Time(a), Value::Time(b)) => a.cmp(b),
114            (Value::Duration(a), Value::Duration(b)) => a.cmp(b),
115            // Cross-type: Duration vs Number compare as i64 nanoseconds
116            (Value::Duration(a), Value::Number(b)) => a.cmp(b),
117            (Value::Number(a), Value::Duration(b)) => a.cmp(b),
118            // Cross-type: Time vs Time already handled; Time vs Number
119            (Value::Time(a), Value::Number(b)) => a.cmp(b),
120            (Value::Number(a), Value::Time(b)) => a.cmp(b),
121            (Value::Compound(ka, a), Value::Compound(kb, b)) => ka.cmp(kb).then_with(|| a.cmp(b)),
122            (Value::Null, Value::Null) => std::cmp::Ordering::Equal,
123            // Cross-variant ordering: Number/Float < String < Time < Duration < Compound < Null
124            (Value::Number(_) | Value::Float(_), _) => std::cmp::Ordering::Less,
125            (_, Value::Number(_) | Value::Float(_)) => std::cmp::Ordering::Greater,
126            (Value::String(_), _) => std::cmp::Ordering::Less,
127            (_, Value::String(_)) => std::cmp::Ordering::Greater,
128            (Value::Time(_), _) => std::cmp::Ordering::Less,
129            (_, Value::Time(_)) => std::cmp::Ordering::Greater,
130            (Value::Duration(_), _) => std::cmp::Ordering::Less,
131            (_, Value::Duration(_)) => std::cmp::Ordering::Greater,
132            (Value::Compound(..), _) => std::cmp::Ordering::Less,
133            (_, Value::Compound(..)) => std::cmp::Ordering::Greater,
134        }
135    }
136}
137
138#[cfg(feature = "edge")]
139impl std::fmt::Display for Value {
140    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
141        match self {
142            Value::Number(n) => write!(f, "{n}"),
143            Value::Float(v) => write!(f, "{v}"),
144            Value::String(s) => write!(f, "{s:?}"),
145            Value::Time(nanos) => write!(f, "{}", format_time_nanos(*nanos)),
146            Value::Duration(nanos) => write!(f, "{}", format_duration_nanos(*nanos)),
147            Value::Compound(kind, elems) => match kind {
148                CompoundKind::List | CompoundKind::Pair => {
149                    write!(f, "[")?;
150                    for (i, e) in elems.iter().enumerate() {
151                        if i > 0 {
152                            write!(f, ", ")?;
153                        }
154                        write!(f, "{e}")?;
155                    }
156                    write!(f, "]")
157                }
158                CompoundKind::Map => {
159                    write!(f, "[")?;
160                    for (i, pair) in elems.chunks_exact(2).enumerate() {
161                        if i > 0 {
162                            write!(f, ", ")?;
163                        }
164                        write!(f, "{}: {}", pair[0], pair[1])?;
165                    }
166                    write!(f, "]")
167                }
168                CompoundKind::Struct => {
169                    write!(f, "{{")?;
170                    for (i, pair) in elems.chunks_exact(2).enumerate() {
171                        if i > 0 {
172                            write!(f, ", ")?;
173                        }
174                        write!(f, "{}: {}", pair[0], pair[1])?;
175                    }
176                    write!(f, "}}")
177                }
178            },
179            Value::Null => write!(f, "null"),
180        }
181    }
182}
183
184#[cfg(feature = "edge")]
185fn format_time_nanos(nanos: i64) -> String {
186    let secs = nanos.div_euclid(1_000_000_000);
187    let ns = nanos.rem_euclid(1_000_000_000) as u32;
188
189    // Convert seconds since epoch to date/time components
190    // Using a simplified algorithm (valid for dates from 1970 onwards)
191    let days = secs.div_euclid(86400);
192    let time_of_day = secs.rem_euclid(86400);
193    let hour = time_of_day / 3600;
194    let minute = (time_of_day % 3600) / 60;
195    let second = time_of_day % 60;
196
197    // Civil date from days since epoch (algorithm from Howard Hinnant)
198    let z = days + 719468;
199    let era = (if z >= 0 { z } else { z - 146096 }) / 146097;
200    let doe = (z - era * 146097) as u32;
201    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
202    let y = yoe as i64 + era * 400;
203    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
204    let mp = (5 * doy + 2) / 153;
205    let d = doy - (153 * mp + 2) / 5 + 1;
206    let m = if mp < 10 { mp + 3 } else { mp - 9 };
207    let y = if m <= 2 { y + 1 } else { y };
208
209    if ns == 0 {
210        format!("{y:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}Z")
211    } else {
212        // Trim trailing zeros from fractional seconds
213        let mut frac = format!("{ns:09}");
214        frac = frac.trim_end_matches('0').to_string();
215        format!("{y:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{frac}Z")
216    }
217}
218
219/// Formats a duration in nanoseconds to match Go's time.Duration.String() output.
220/// Produces compound forms like "1h30m0s", "2.5s", "500ms", "150ns".
221#[cfg(feature = "edge")]
222fn format_duration_nanos(nanos: i64) -> String {
223    if nanos == 0 {
224        return "0s".to_string();
225    }
226
227    let mut result = String::new();
228    let mut remaining = if nanos < 0 {
229        result.push('-');
230        nanos.unsigned_abs()
231    } else {
232        nanos as u64
233    };
234
235    const NANOS_PER_NS: u64 = 1;
236    const NANOS_PER_US: u64 = 1_000;
237    const NANOS_PER_MS: u64 = 1_000_000;
238    const NANOS_PER_S: u64 = 1_000_000_000;
239    const NANOS_PER_M: u64 = 60 * NANOS_PER_S;
240    const NANOS_PER_H: u64 = 60 * NANOS_PER_M;
241
242    // For durations >= 1s, Go uses compound h/m/s format
243    if remaining >= NANOS_PER_S {
244        let hours = remaining / NANOS_PER_H;
245        remaining %= NANOS_PER_H;
246        let minutes = remaining / NANOS_PER_M;
247        remaining %= NANOS_PER_M;
248        let seconds = remaining / NANOS_PER_S;
249        let sub_second_nanos = remaining % NANOS_PER_S;
250
251        if hours > 0 {
252            result.push_str(&format!("{hours}h"));
253        }
254        if minutes > 0 || hours > 0 {
255            result.push_str(&format!("{minutes}m"));
256        }
257        if sub_second_nanos == 0 {
258            result.push_str(&format!("{seconds}s"));
259        } else {
260            // Format fractional seconds, trimming trailing zeros
261            let frac = format!("{sub_second_nanos:09}");
262            let frac = frac.trim_end_matches('0');
263            result.push_str(&format!("{seconds}.{frac}s"));
264        }
265    } else if remaining >= NANOS_PER_MS {
266        // Milliseconds with optional fractional part
267        let ms = remaining / NANOS_PER_MS;
268        let sub = remaining % NANOS_PER_MS;
269        if sub == 0 {
270            result.push_str(&format!("{ms}ms"));
271        } else {
272            let frac = format!("{sub:06}");
273            let frac = frac.trim_end_matches('0');
274            result.push_str(&format!("{ms}.{frac}ms"));
275        }
276    } else if remaining >= NANOS_PER_US {
277        // Microseconds
278        let us = remaining / NANOS_PER_US;
279        let sub = remaining % NANOS_PER_US;
280        if sub == 0 {
281            result.push_str(&format!("{us}µs"));
282        } else {
283            let frac = format!("{sub:03}");
284            let frac = frac.trim_end_matches('0');
285            result.push_str(&format!("{us}.{frac}µs"));
286        }
287    } else {
288        // Nanoseconds
289        result.push_str(&format!("{}ns", remaining));
290    }
291
292    result
293}
294
295/// Abstract interface for relation storage (Edge Mode).
296///
297/// Tuples are currently stored as `Vec<Value>` where compound values
298/// (lists, structs, maps) appear as `Value::Compound(...)`.
299///
300/// TODO: Support explicit table flattening. When a user annotates a type
301/// declaration, compound columns should be inlined into the tuple as
302/// length-prefixed sequences of scalar values. This flattening should
303/// only apply to explicitly requested levels of the type tree (no
304/// automatic recursive flattening). The Store would then see wider
305/// tuples of scalar values instead of Compound entries.
306#[cfg(feature = "edge")]
307pub trait Store {
308    /// Returns an iterator over all tuples in the relation.
309    /// Returns an error if the relation does not exist.
310    fn scan(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
311
312    /// Returns an iterator over only the new tuples added in the last iteration.
313    fn scan_delta(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
314
315    /// Returns an iterator over tuples being collected for the next iteration.
316    fn scan_next_delta(&self, relation: &str) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
317
318    /// Returns an iterator over tuples in the relation matching a key in a column.
319    fn scan_index(
320        &self,
321        relation: &str,
322        col_idx: usize,
323        key: &Value,
324    ) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
325
326    /// Returns an iterator over delta tuples matching a key in a column.
327    fn scan_delta_index(
328        &self,
329        relation: &str,
330        col_idx: usize,
331        key: &Value,
332    ) -> Result<Box<dyn Iterator<Item = Vec<Value>> + '_>>;
333
334    /// Inserts a tuple into the relation (specifically into the delta/new set).
335    /// Returns true if it was new.
336    fn insert(&mut self, relation: &str, tuple: Vec<Value>) -> Result<bool>;
337
338    /// Merges current deltas into the stable set of facts.
339    fn merge_deltas(&mut self);
340
341    /// Ensures a relation exists in the store.
342    fn create_relation(&mut self, relation: &str);
343
344    /// Removes a specific tuple from the relation's stable set.
345    /// Returns true if the tuple was found and removed.
346    fn retract(&mut self, relation: &str, tuple: &[Value]) -> Result<bool>;
347
348    /// Removes all tuples from a relation (stable, delta, and next_delta).
349    fn clear(&mut self, relation: &str);
350
351    /// Returns the names of all relations in the store.
352    fn relation_names(&self) -> Vec<String>;
353
354    /// Coalesce temporal intervals for a relation.
355    /// Groups facts by their non-temporal columns (all except the last 2),
356    /// sorts intervals by start time, and merges overlapping/adjacent intervals.
357    /// Default implementation is a no-op.
358    fn coalesce_temporal(&mut self, _relation: &str) {}
359}
360
361/// Opaque handle to a value in the host's value store.
362/// In WASM, these are represented as `externref`.
363#[cfg(feature = "server")]
364#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
365pub struct HostVal(pub u32);
366
367/// Trait for the host environment that provides storage and data access (Server Mode).
368///
369/// All Mangle values (numbers, floats, strings, compounds) are represented as
370/// opaque `HostVal` handles. In WASM, these map to `externref` — the WASM module
371/// never inspects values directly; all operations go through host calls.
372#[cfg(feature = "server")]
373pub trait Host {
374    // --- Iterator/scan (control flow) ---
375    fn scan_start(&mut self, rel_id: i32) -> i32;
376    fn scan_delta_start(&mut self, rel_id: i32) -> i32;
377    fn scan_next(&mut self, iter_id: i32) -> i32;
378    /// Merges deltas and returns 1 if changes occurred, 0 otherwise.
379    fn merge_deltas(&mut self) -> i32;
380    fn scan_aggregate_start(&mut self, rel_id: i32, description: Vec<i32>) -> i32;
381    fn scan_index_start(&mut self, rel_id: i32, col_idx: i32, val: HostVal) -> i32;
382
383    // --- Value access ---
384    fn get_col(&mut self, tuple_ptr: i32, col_idx: i32) -> HostVal;
385
386    // --- Multi-column insertion ---
387    fn insert_begin(&mut self, rel_id: i32);
388    fn insert_push(&mut self, val: HostVal);
389    fn insert_end(&mut self);
390
391    // --- Constants ---
392    fn const_number(&mut self, n: i64) -> HostVal;
393    fn const_float(&mut self, bits: i64) -> HostVal;
394    fn const_string(&mut self, id: i32) -> HostVal;
395    fn const_name(&mut self, id: i32) -> HostVal;
396    fn const_time(&mut self, nanos: i64) -> HostVal;
397    fn const_duration(&mut self, nanos: i64) -> HostVal;
398
399    // --- Arithmetic (handles int/float promotion internally) ---
400    fn val_add(&mut self, a: HostVal, b: HostVal) -> HostVal;
401    fn val_sub(&mut self, a: HostVal, b: HostVal) -> HostVal;
402    fn val_mul(&mut self, a: HostVal, b: HostVal) -> HostVal;
403    fn val_div(&mut self, a: HostVal, b: HostVal) -> HostVal;
404    fn val_sqrt(&mut self, a: HostVal) -> HostVal;
405
406    // --- Comparisons (return 0 or 1) ---
407    fn val_eq(&mut self, a: HostVal, b: HostVal) -> i32;
408    fn val_neq(&mut self, a: HostVal, b: HostVal) -> i32;
409    fn val_lt(&mut self, a: HostVal, b: HostVal) -> i32;
410    fn val_le(&mut self, a: HostVal, b: HostVal) -> i32;
411    fn val_gt(&mut self, a: HostVal, b: HostVal) -> i32;
412    fn val_ge(&mut self, a: HostVal, b: HostVal) -> i32;
413
414    // --- String operations ---
415    fn str_concat(&mut self, a: HostVal, b: HostVal) -> HostVal;
416    fn str_replace(&mut self, s: HostVal, old: HostVal, new: HostVal, count: HostVal) -> HostVal;
417    fn val_to_string(&mut self, val: HostVal) -> HostVal;
418
419    // --- Compound operations ---
420    /// Begin building a compound value. kind: 0=List, 1=Pair, 2=Map, 3=Struct.
421    fn compound_begin(&mut self, kind: i32);
422    fn compound_push(&mut self, val: HostVal);
423    fn compound_end(&mut self) -> HostVal;
424    /// Get element by index (list) or value by key (map/struct).
425    fn compound_get(&mut self, compound: HostVal, key: HostVal) -> HostVal;
426    /// Get length/size of compound, returned as a Number HostVal.
427    fn compound_len(&mut self, compound: HostVal) -> HostVal;
428    fn pair_first(&mut self, compound: HostVal) -> HostVal;
429    fn pair_second(&mut self, compound: HostVal) -> HostVal;
430
431    // --- Debug ---
432    fn debuglog(&mut self, val: HostVal);
433}
434
435// --- Legacy Interfaces ---
436
437pub trait Receiver<'a> {
438    fn next(&self, item: &'a ast::Atom<'a>) -> Result<()>;
439}
440
441impl<'a, Closure: Fn(&'a ast::Atom<'a>) -> Result<()>> Receiver<'a> for Closure {
442    fn next(&self, item: &'a ast::Atom<'a>) -> Result<()> {
443        (*self)(item)
444    }
445}
446
447/// Lifetime 'a is used for data held by this store.
448pub trait ReadOnlyFactStore<'a> {
449    fn arena(&'a self) -> &'a Arena;
450
451    fn contains<'src>(&'a self, src: &'src Arena, fact: &'src ast::Atom<'src>) -> Result<bool>;
452
453    // Sends atoms that matches query `Atom{ sym: query_sym, args: query_args}`.
454    // pub sym: PredicateIndex,
455    fn get<'query, R: Receiver<'a>>(
456        &'a self,
457        query_sym: ast::PredicateIndex,
458        query_args: &'query [&'query ast::BaseTerm<'query>],
459        cb: &R,
460    ) -> Result<()>;
461
462    // Invokes cb for every predicate available in this store.
463    // It would be nice to use `impl Iterator` here.
464    fn predicates(&'a self) -> Vec<ast::PredicateIndex>;
465
466    // Returns approximae number of facts.
467    fn estimate_fact_count(&self) -> u32;
468}
469
470/// A fact store that can be mutated.
471/// Implementations must make use of interior mutability.
472pub trait FactStore<'a>: ReadOnlyFactStore<'a> {
473    /// Returns true if fact did not exist before.
474    /// The fact is copied.
475    fn add<'src>(&'a self, src: &'src Arena, fact: &'src ast::Atom<'src>) -> Result<bool>;
476
477    /// Adds all facts from given store.
478    fn merge<'src, S>(&'a self, src: &'src Arena, store: &'src S)
479    where
480        S: ReadOnlyFactStore<'src>;
481}
482
483/// Invokes cb for every fact in the store.
484pub fn get_all_facts<'a, S, R: Receiver<'a>>(store: &'a S, cb: &R) -> Result<()>
485where
486    S: ReadOnlyFactStore<'a> + 'a,
487{
488    let arena = Arena::new_with_global_interner();
489    let preds = store.predicates();
490
491    for pred in preds {
492        arena.copy_predicate_sym(store.arena(), pred);
493        store.get(pred, arena.new_query(pred).args, cb)?;
494    }
495    Ok(())
496}