use crate::node::{BinOp, Node, NodeHash, Param, Produces};
use crate::ty::{Effect, Type};
use rusqlite::{params, Connection, OptionalExtension};
use std::collections::BTreeSet;
use std::fmt;
use std::path::Path;
#[derive(Debug)]
pub enum Error {
Sqlite(rusqlite::Error),
Decode(serde_json::Error),
NameInUse(String),
MissingNode(NodeHash),
FormatMismatch { found: String, expected: u32 },
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::Sqlite(e) => write!(f, "sqlite error: {e}"),
Error::Decode(e) => write!(f, "node decode error: {e}"),
Error::NameInUse(n) => write!(f, "ref name already in use: {n}"),
Error::MissingNode(h) => write!(f, "missing node: {h}"),
Error::FormatMismatch { found, expected } => write!(
f,
"store format version {found} is not the supported {expected} \
(pre-1.0 the AST format is not stable; this store must be \
rebuilt with the current version)"
),
}
}
}
impl std::error::Error for Error {}
impl From<rusqlite::Error> for Error {
fn from(e: rusqlite::Error) -> Self {
Error::Sqlite(e)
}
}
impl From<serde_json::Error> for Error {
fn from(e: serde_json::Error) -> Self {
Error::Decode(e)
}
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Materialized {
Lit(i64),
FloatLit(u64),
FloatOp {
op: BinOp,
lhs: Box<Materialized>,
rhs: Box<Materialized>,
},
IntToFloat(Box<Materialized>),
FloatToInt(Box<Materialized>),
DecimalLit(i64),
DecimalOp {
op: BinOp,
lhs: Box<Materialized>,
rhs: Box<Materialized>,
},
IntToDecimal(Box<Materialized>),
DecimalToInt(Box<Materialized>),
DecimalRaw(Box<Materialized>),
Bool(bool),
Not(Box<Materialized>),
Str(String),
StrLen(Box<Materialized>),
StrLower(Box<Materialized>),
StrFromCode(Box<Materialized>),
StrConcat(Box<Materialized>, Box<Materialized>),
StrSlice {
s: Box<Materialized>,
start: Box<Materialized>,
len: Box<Materialized>,
},
StrEq(Box<Materialized>, Box<Materialized>),
StrContains {
haystack: Box<Materialized>,
needle: Box<Materialized>,
},
StrStartsWith {
s: Box<Materialized>,
prefix: Box<Materialized>,
},
StrIndexOf {
haystack: Box<Materialized>,
needle: Box<Materialized>,
},
NumberToStr(Box<Materialized>),
StrToNumber(Box<Materialized>),
StrToNumberOpt(Box<Materialized>),
Now,
List(Vec<Materialized>),
ListEmpty {
elem: Type,
},
ListCons {
head: Box<Materialized>,
tail: Box<Materialized>,
},
OptionSome(Box<Materialized>),
OptionNone {
elem: Type,
},
OptionElse {
opt: Box<Materialized>,
default: Box<Materialized>,
},
OptionMatch {
opt: Box<Materialized>,
some_bind: String,
some_body: Box<Materialized>,
none_body: Box<Materialized>,
},
ListTryGet {
list: Box<Materialized>,
index: Box<Materialized>,
},
ListLen(Box<Materialized>),
ListGet {
list: Box<Materialized>,
index: Box<Materialized>,
},
Map(Vec<(Materialized, Materialized)>),
MapGet {
map: Box<Materialized>,
key: Box<Materialized>,
},
MapTryGet {
map: Box<Materialized>,
key: Box<Materialized>,
},
MapLen(Box<Materialized>),
Log(Box<Materialized>),
Publish(Box<Materialized>),
SetHeader {
name: Box<Materialized>,
value: Box<Materialized>,
},
Rand,
MutNew(Box<Materialized>),
MutGet(Box<Materialized>),
MutSet {
cell: Box<Materialized>,
value: Box<Materialized>,
},
DiskWrite {
path: Box<Materialized>,
content: Box<Materialized>,
},
DiskRead(Box<Materialized>),
NetGet(Box<Materialized>),
DbQuery {
sql: Box<Materialized>,
params: Box<Materialized>,
},
Ref(String),
Call {
func: String,
args: Vec<Materialized>,
},
FuncRef(String),
CallValue {
callee: Box<Materialized>,
args: Vec<Materialized>,
},
Lambda {
params: Vec<Param>,
body: Box<Materialized>,
},
Hole {
expects: String,
},
Step {
binding: String,
value: Box<Materialized>,
},
BinOp {
op: BinOp,
lhs: Box<Materialized>,
rhs: Box<Materialized>,
},
If {
cond: Box<Materialized>,
then_branch: Box<Materialized>,
else_branch: Box<Materialized>,
},
Fail(String),
Handle {
body: Box<Materialized>,
handlers: Vec<(String, Materialized)>,
},
Function {
name: String,
type_params: Vec<String>,
params: Vec<Param>,
produces: Produces,
requires: BTreeSet<Effect>,
on_failure: Vec<String>,
body: Vec<Materialized>,
result: Box<Materialized>,
},
Module {
name: String,
types: Vec<Materialized>,
functions: Vec<Materialized>,
},
RecordDef {
name: String,
fields: Vec<(String, Type)>,
},
Record {
type_name: String,
fields: Vec<(String, Materialized)>,
},
Field {
base: Box<Materialized>,
type_name: String,
field: String,
},
VariantDef {
name: String,
cases: Vec<(String, Vec<(String, Type)>)>,
},
Variant {
type_name: String,
case: String,
fields: Vec<(String, Materialized)>,
},
Match {
scrutinee: Box<Materialized>,
type_name: String,
arms: Vec<(String, Vec<String>, Materialized)>,
},
}
pub const FORMAT_VERSION: u32 = 1;
pub struct Store {
conn: Connection,
}
impl Store {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
Self::init(Connection::open(path)?)
}
pub fn open_in_memory() -> Result<Self> {
Self::init(Connection::open_in_memory()?)
}
fn init(conn: Connection) -> Result<Self> {
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS nodes (
hash TEXT PRIMARY KEY,
body BLOB NOT NULL
);
CREATE TABLE IF NOT EXISTS refs (
name TEXT PRIMARY KEY,
root TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('branch','checkpoint'))
);
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);",
)?;
let recorded: Option<String> = conn
.query_row(
"SELECT value FROM meta WHERE key = 'format_version'",
[],
|row| row.get(0),
)
.optional()?;
match recorded {
Some(v) if v == FORMAT_VERSION.to_string() => {}
Some(v) => {
return Err(Error::FormatMismatch {
found: v,
expected: FORMAT_VERSION,
})
}
None => {
let has_nodes: bool = conn.query_row(
"SELECT EXISTS(SELECT 1 FROM nodes LIMIT 1)",
[],
|row| row.get::<_, i64>(0),
)? != 0;
if has_nodes {
return Err(Error::FormatMismatch {
found: "unversioned".into(),
expected: FORMAT_VERSION,
});
}
conn.execute(
"INSERT INTO meta (key, value) VALUES ('format_version', ?1)",
params![FORMAT_VERSION.to_string()],
)?;
}
}
Ok(Self { conn })
}
pub fn put(&self, node: &Node) -> Result<NodeHash> {
let hash = node.hash();
self.conn.execute(
"INSERT OR IGNORE INTO nodes (hash, body) VALUES (?1, ?2)",
params![hash.as_str(), node.canonical_bytes()],
)?;
Ok(hash)
}
pub fn get(&self, hash: &NodeHash) -> Result<Option<Node>> {
let body: Option<Vec<u8>> = self
.conn
.query_row(
"SELECT body FROM nodes WHERE hash = ?1",
params![hash.as_str()],
|row| row.get(0),
)
.optional()?;
match body {
Some(bytes) => Ok(Some(serde_json::from_slice(&bytes)?)),
None => Ok(None),
}
}
pub fn node_count(&self) -> Result<i64> {
Ok(self
.conn
.query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?)
}
pub fn checkpoint(&self, name: &str, root: &NodeHash) -> Result<()> {
if self.ref_kind(name)?.is_some() {
return Err(Error::NameInUse(name.to_string()));
}
self.conn.execute(
"INSERT INTO refs (name, root, kind) VALUES (?1, ?2, 'checkpoint')",
params![name, root.as_str()],
)?;
Ok(())
}
pub fn branch(&self, name: &str, root: &NodeHash) -> Result<()> {
if self.ref_kind(name)?.as_deref() == Some("checkpoint") {
return Err(Error::NameInUse(name.to_string()));
}
self.conn.execute(
"INSERT INTO refs (name, root, kind) VALUES (?1, ?2, 'branch')
ON CONFLICT(name) DO UPDATE SET root = excluded.root",
params![name, root.as_str()],
)?;
Ok(())
}
pub fn resolve(&self, name: &str) -> Result<Option<NodeHash>> {
let root: Option<String> = self
.conn
.query_row(
"SELECT root FROM refs WHERE name = ?1",
params![name],
|row| row.get(0),
)
.optional()?;
Ok(root.map(NodeHash::from_raw))
}
fn ref_kind(&self, name: &str) -> Result<Option<String>> {
Ok(self
.conn
.query_row(
"SELECT kind FROM refs WHERE name = ?1",
params![name],
|row| row.get(0),
)
.optional()?)
}
pub fn materialize(&self, hash: &NodeHash) -> Result<Materialized> {
let node = self
.get(hash)?
.ok_or_else(|| Error::MissingNode(hash.clone()))?;
Ok(match node {
Node::Lit(v) => Materialized::Lit(v),
Node::FloatLit(b) => Materialized::FloatLit(b),
Node::FloatOp { op, lhs, rhs } => Materialized::FloatOp {
op,
lhs: Box::new(self.materialize(&lhs)?),
rhs: Box::new(self.materialize(&rhs)?),
},
Node::IntToFloat(a) => {
Materialized::IntToFloat(Box::new(self.materialize(&a)?))
}
Node::FloatToInt(a) => {
Materialized::FloatToInt(Box::new(self.materialize(&a)?))
}
Node::DecimalLit(v) => Materialized::DecimalLit(v),
Node::DecimalOp { op, lhs, rhs } => Materialized::DecimalOp {
op,
lhs: Box::new(self.materialize(&lhs)?),
rhs: Box::new(self.materialize(&rhs)?),
},
Node::IntToDecimal(a) => {
Materialized::IntToDecimal(Box::new(self.materialize(&a)?))
}
Node::DecimalRaw(a) => {
Materialized::DecimalRaw(Box::new(self.materialize(&a)?))
}
Node::DecimalToInt(a) => {
Materialized::DecimalToInt(Box::new(self.materialize(&a)?))
}
Node::Bool(b) => Materialized::Bool(b),
Node::Not(a) => Materialized::Not(Box::new(self.materialize(&a)?)),
Node::Str(s) => Materialized::Str(s),
Node::StrLen(a) => Materialized::StrLen(Box::new(self.materialize(&a)?)),
Node::StrLower(a) => {
Materialized::StrLower(Box::new(self.materialize(&a)?))
}
Node::StrFromCode(a) => {
Materialized::StrFromCode(Box::new(self.materialize(&a)?))
}
Node::StrConcat(a, b) => Materialized::StrConcat(
Box::new(self.materialize(&a)?),
Box::new(self.materialize(&b)?),
),
Node::StrSlice { s, start, len } => Materialized::StrSlice {
s: Box::new(self.materialize(&s)?),
start: Box::new(self.materialize(&start)?),
len: Box::new(self.materialize(&len)?),
},
Node::StrEq(a, b) => Materialized::StrEq(
Box::new(self.materialize(&a)?),
Box::new(self.materialize(&b)?),
),
Node::StrContains { haystack, needle } => Materialized::StrContains {
haystack: Box::new(self.materialize(&haystack)?),
needle: Box::new(self.materialize(&needle)?),
},
Node::StrStartsWith { s, prefix } => Materialized::StrStartsWith {
s: Box::new(self.materialize(&s)?),
prefix: Box::new(self.materialize(&prefix)?),
},
Node::StrIndexOf { haystack, needle } => Materialized::StrIndexOf {
haystack: Box::new(self.materialize(&haystack)?),
needle: Box::new(self.materialize(&needle)?),
},
Node::NumberToStr(a) => {
Materialized::NumberToStr(Box::new(self.materialize(&a)?))
}
Node::StrToNumber(a) => {
Materialized::StrToNumber(Box::new(self.materialize(&a)?))
}
Node::StrToNumberOpt(a) => {
Materialized::StrToNumberOpt(Box::new(self.materialize(&a)?))
}
Node::Now => Materialized::Now,
Node::List(es) => {
let mut ms = Vec::with_capacity(es.len());
for e in es {
ms.push(self.materialize(&e)?);
}
Materialized::List(ms)
}
Node::ListEmpty { elem } => Materialized::ListEmpty { elem },
Node::ListCons { head, tail } => Materialized::ListCons {
head: Box::new(self.materialize(&head)?),
tail: Box::new(self.materialize(&tail)?),
},
Node::OptionSome(v) => {
Materialized::OptionSome(Box::new(self.materialize(&v)?))
}
Node::OptionNone { elem } => Materialized::OptionNone { elem },
Node::OptionElse { opt, default } => Materialized::OptionElse {
opt: Box::new(self.materialize(&opt)?),
default: Box::new(self.materialize(&default)?),
},
Node::OptionMatch {
opt,
some_bind,
some_body,
none_body,
} => Materialized::OptionMatch {
opt: Box::new(self.materialize(&opt)?),
some_bind,
some_body: Box::new(self.materialize(&some_body)?),
none_body: Box::new(self.materialize(&none_body)?),
},
Node::ListTryGet { list, index } => Materialized::ListTryGet {
list: Box::new(self.materialize(&list)?),
index: Box::new(self.materialize(&index)?),
},
Node::ListLen(a) => Materialized::ListLen(Box::new(self.materialize(&a)?)),
Node::ListGet { list, index } => Materialized::ListGet {
list: Box::new(self.materialize(&list)?),
index: Box::new(self.materialize(&index)?),
},
Node::Map(pairs) => {
let mut ms = Vec::with_capacity(pairs.len());
for (k, v) in pairs {
ms.push((self.materialize(&k)?, self.materialize(&v)?));
}
Materialized::Map(ms)
}
Node::MapGet { map, key } => Materialized::MapGet {
map: Box::new(self.materialize(&map)?),
key: Box::new(self.materialize(&key)?),
},
Node::MapTryGet { map, key } => Materialized::MapTryGet {
map: Box::new(self.materialize(&map)?),
key: Box::new(self.materialize(&key)?),
},
Node::MapLen(a) => Materialized::MapLen(Box::new(self.materialize(&a)?)),
Node::Log(a) => Materialized::Log(Box::new(self.materialize(&a)?)),
Node::Publish(a) => {
Materialized::Publish(Box::new(self.materialize(&a)?))
}
Node::SetHeader { name, value } => Materialized::SetHeader {
name: Box::new(self.materialize(&name)?),
value: Box::new(self.materialize(&value)?),
},
Node::Rand => Materialized::Rand,
Node::MutNew(v) => Materialized::MutNew(Box::new(self.materialize(&v)?)),
Node::MutGet(c) => Materialized::MutGet(Box::new(self.materialize(&c)?)),
Node::MutSet { cell, value } => Materialized::MutSet {
cell: Box::new(self.materialize(&cell)?),
value: Box::new(self.materialize(&value)?),
},
Node::DiskWrite { path, content } => Materialized::DiskWrite {
path: Box::new(self.materialize(&path)?),
content: Box::new(self.materialize(&content)?),
},
Node::DiskRead(p) => {
Materialized::DiskRead(Box::new(self.materialize(&p)?))
}
Node::NetGet(u) => {
Materialized::NetGet(Box::new(self.materialize(&u)?))
}
Node::DbQuery { sql, params } => Materialized::DbQuery {
sql: Box::new(self.materialize(&sql)?),
params: Box::new(self.materialize(¶ms)?),
},
Node::Ref(name) => Materialized::Ref(name),
Node::Call { func, args } => Materialized::Call {
func,
args: args
.iter()
.map(|h| self.materialize(h))
.collect::<Result<_>>()?,
},
Node::FuncRef(name) => Materialized::FuncRef(name),
Node::CallValue { callee, args } => Materialized::CallValue {
callee: Box::new(self.materialize(&callee)?),
args: args
.iter()
.map(|h| self.materialize(h))
.collect::<Result<_>>()?,
},
Node::Lambda { params, body } => Materialized::Lambda {
params,
body: Box::new(self.materialize(&body)?),
},
Node::Hole { expects } => Materialized::Hole { expects },
Node::Step { binding, value } => Materialized::Step {
binding,
value: Box::new(self.materialize(&value)?),
},
Node::BinOp { op, lhs, rhs } => Materialized::BinOp {
op,
lhs: Box::new(self.materialize(&lhs)?),
rhs: Box::new(self.materialize(&rhs)?),
},
Node::If {
cond,
then_branch,
else_branch,
} => Materialized::If {
cond: Box::new(self.materialize(&cond)?),
then_branch: Box::new(self.materialize(&then_branch)?),
else_branch: Box::new(self.materialize(&else_branch)?),
},
Node::Fail(v) => Materialized::Fail(v),
Node::Handle { body, handlers } => {
let mut hs = Vec::with_capacity(handlers.len());
for (variant, recover) in handlers {
hs.push((variant, self.materialize(&recover)?));
}
Materialized::Handle {
body: Box::new(self.materialize(&body)?),
handlers: hs,
}
}
Node::Function {
name,
type_params,
params,
produces,
requires,
on_failure,
body,
result,
} => Materialized::Function {
name,
type_params,
params,
produces,
requires,
on_failure,
body: body
.iter()
.map(|h| self.materialize(h))
.collect::<Result<_>>()?,
result: Box::new(self.materialize(&result)?),
},
Node::Module {
name,
types,
functions,
} => Materialized::Module {
name,
types: types
.iter()
.map(|h| self.materialize(h))
.collect::<Result<_>>()?,
functions: functions
.iter()
.map(|h| self.materialize(h))
.collect::<Result<_>>()?,
},
Node::RecordDef { name, fields } => Materialized::RecordDef { name, fields },
Node::Record { type_name, fields } => {
let mut fs = Vec::with_capacity(fields.len());
for (n, h) in fields {
fs.push((n, self.materialize(&h)?));
}
Materialized::Record {
type_name,
fields: fs,
}
}
Node::Field {
base,
type_name,
field,
} => Materialized::Field {
base: Box::new(self.materialize(&base)?),
type_name,
field,
},
Node::VariantDef { name, cases } => Materialized::VariantDef { name, cases },
Node::Variant {
type_name,
case,
fields,
} => {
let mut fs = Vec::with_capacity(fields.len());
for (n, h) in fields {
fs.push((n, self.materialize(&h)?));
}
Materialized::Variant {
type_name,
case,
fields: fs,
}
}
Node::Match {
scrutinee,
type_name,
arms,
} => {
let mut ms = Vec::with_capacity(arms.len());
for a in arms {
ms.push((a.case, a.bindings, self.materialize(&a.body)?));
}
Materialized::Match {
scrutinee: Box::new(self.materialize(&scrutinee)?),
type_name,
arms: ms,
}
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::node::{Param, Produces};
use crate::ty::{Confidence, Type};
use std::collections::BTreeSet;
fn build(store: &Store) -> NodeHash {
let n = store.put(&Node::Ref("n".into())).unwrap();
let call = store
.put(&Node::Call {
func: "add".into(),
args: vec![n.clone(), n.clone()],
})
.unwrap();
let step = store
.put(&Node::Step {
binding: "doubled".into(),
value: call,
})
.unwrap();
let result = store.put(&Node::Ref("doubled".into())).unwrap();
store
.put(&Node::Function {
name: "double".into(),
type_params: vec![],
params: vec![Param {
name: "n".into(),
ty: Type::Number,
min_confidence: Confidence::External,
}],
produces: Produces {
ty: Type::Number,
confidence: Confidence::Structural,
},
requires: BTreeSet::new(),
on_failure: vec![],
body: vec![step],
result,
})
.unwrap()
}
#[test]
fn hashing_is_deterministic_and_merkle() {
let a = Store::open_in_memory().unwrap();
let b = Store::open_in_memory().unwrap();
assert_eq!(build(&a), build(&b));
}
#[test]
fn identical_subtrees_are_stored_once() {
let s = Store::open_in_memory().unwrap();
build(&s);
assert_eq!(s.node_count().unwrap(), 5);
}
#[test]
fn checkpoint_round_trips_exactly() {
let s = Store::open_in_memory().unwrap();
let root = build(&s);
let before = s.materialize(&root).unwrap();
s.checkpoint("v1", &root).unwrap();
let resolved = s.resolve("v1").unwrap().expect("v1 resolves");
assert_eq!(resolved, root);
assert_eq!(s.materialize(&resolved).unwrap(), before);
}
#[test]
fn checkpoints_are_immutable() {
let s = Store::open_in_memory().unwrap();
let root = build(&s);
s.checkpoint("v1", &root).unwrap();
assert!(matches!(
s.checkpoint("v1", &root),
Err(Error::NameInUse(_))
));
}
#[test]
fn branches_move_but_cannot_clobber_a_checkpoint() {
let s = Store::open_in_memory().unwrap();
let root = build(&s);
let leaf = s.put(&Node::Lit(1)).unwrap();
s.branch("main", &root).unwrap();
assert_eq!(s.resolve("main").unwrap().unwrap(), root);
s.branch("main", &leaf).unwrap();
assert_eq!(s.resolve("main").unwrap().unwrap(), leaf);
s.checkpoint("release", &root).unwrap();
assert!(matches!(
s.branch("release", &leaf),
Err(Error::NameInUse(_))
));
}
#[test]
fn persists_across_reopen() {
let mut path = std::env::temp_dir();
path.push(format!("cairn-store-test-{}.sqlite", std::process::id()));
let _ = std::fs::remove_file(&path);
let root = {
let s = Store::open(&path).unwrap();
let root = build(&s);
s.checkpoint("v1", &root).unwrap();
root
};
{
let s = Store::open(&path).unwrap();
let resolved = s.resolve("v1").unwrap().expect("v1 survives reopen");
assert_eq!(resolved, root);
assert_eq!(
s.materialize(&resolved).unwrap(),
Store::open_in_memory()
.map(|m| m.materialize(&build(&m)).unwrap())
.unwrap()
);
}
std::fs::remove_file(&path).unwrap();
}
#[test]
fn refuses_a_foreign_or_unversioned_store() {
let mut path = std::env::temp_dir();
path.push(format!("cairn-fmt-test-{}.sqlite", std::process::id()));
let _ = std::fs::remove_file(&path);
{
let s = Store::open(&path).unwrap();
s.put(&Node::Lit(1)).unwrap();
}
assert!(Store::open(&path).is_ok(), "same-version store must reopen");
{
let c = Connection::open(&path).unwrap();
c.execute(
"UPDATE meta SET value = '999' WHERE key = 'format_version'",
[],
)
.unwrap();
}
match Store::open(&path) {
Err(Error::FormatMismatch { found, expected }) => {
assert_eq!(found, "999");
assert_eq!(expected, FORMAT_VERSION);
}
Err(e) => panic!("expected FormatMismatch, got {e:?}"),
Ok(_) => panic!("expected FormatMismatch, store opened"),
}
std::fs::remove_file(&path).unwrap();
path.push("");
path.set_file_name(format!("cairn-fmt-old-{}.sqlite", std::process::id()));
let _ = std::fs::remove_file(&path);
{
let c = Connection::open(&path).unwrap();
c.execute_batch(
"CREATE TABLE nodes (hash TEXT PRIMARY KEY, body BLOB NOT NULL);
INSERT INTO nodes (hash, body) VALUES ('h', x'00');",
)
.unwrap();
}
assert!(
matches!(Store::open(&path), Err(Error::FormatMismatch { .. })),
"an unversioned store holding nodes must be refused"
);
std::fs::remove_file(&path).unwrap();
}
}