use crate::runtime::store::{FieldSchema, MoleculeSchema, TypeRegistry};
use crate::syntax::ast::*;
use crate::value::{MoleculeKindId, PrimitiveType, Type};
use crate::Error;
use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
#[derive(Clone)]
pub struct VerifiedProgram {
pub program: Arc<Program>,
pub type_registry: Arc<TypeRegistry>,
pub when_eligibility: BTreeMap<MoleculeKindId, Vec<(usize, usize)>>,
pub rollup_eligibility: BTreeMap<MoleculeKindId, Vec<usize>>,
}
impl VerifiedProgram {
pub fn type_registry(&self) -> &Arc<TypeRegistry> {
&self.type_registry
}
pub fn reactions(&self) -> &[ReactionDecl] {
&self.program.reactions
}
pub fn reactions_for(&self, kind: MoleculeKindId) -> &[(usize, usize)] {
self.when_eligibility
.get(&kind)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
pub fn rollup_reactions_for(&self, kind: MoleculeKindId) -> &[usize] {
self.rollup_eligibility
.get(&kind)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
}
pub fn run_passes(program: Program) -> Result<VerifiedProgram, Error> {
let registry = Arc::new(TypeRegistry::new());
register_builtins(®istry);
register_user_molecules(®istry, &program)?;
pass_typecheck(&program, ®istry)?;
pass_coverage(&program, ®istry)?;
pass_no_self_join(&program, ®istry)?;
pass_no_cycle(&program, ®istry)?;
let (when_eligibility, rollup_eligibility) = build_eligibility(&program, ®istry)?;
Ok(VerifiedProgram {
program: Arc::new(program),
type_registry: registry,
when_eligibility,
rollup_eligibility,
})
}
fn register_builtins(registry: &TypeRegistry) {
registry.register(MoleculeSchema {
name: "Timer".into(),
fields: vec![FieldSchema {
name: "interval".into(),
ty: Type::Primitive(PrimitiveType::Duration),
default: None,
}],
primary_key: vec![],
merge: None,
is_singleton: true,
});
registry.register(MoleculeSchema {
name: "Boot".into(),
fields: vec![FieldSchema {
name: "ts".into(),
ty: Type::Primitive(PrimitiveType::Timestamp),
default: Some(Expr::Call("now".into(), vec![])),
}],
primary_key: vec![],
merge: None,
is_singleton: true,
});
registry.register(MoleculeSchema {
name: "Tick".into(),
fields: vec![
FieldSchema {
name: "sequence".into(),
ty: Type::Primitive(PrimitiveType::Int),
default: None,
},
FieldSchema {
name: "ts".into(),
ty: Type::Primitive(PrimitiveType::Timestamp),
default: None,
},
],
primary_key: vec!["sequence".into()],
merge: None,
is_singleton: false,
});
registry.register(MoleculeSchema {
name: "LlmCall".into(),
fields: vec![
FieldSchema {
name: "id".into(),
ty: Type::Primitive(PrimitiveType::Uuid),
default: Some(Expr::Call("uuid".into(), vec![])),
},
FieldSchema {
name: "provider".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("mock".into())),
},
FieldSchema {
name: "model".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("default".into())),
},
FieldSchema {
name: "prompt".into(),
ty: Type::Primitive(PrimitiveType::String),
default: None,
},
FieldSchema {
name: "temperature".into(),
ty: Type::Primitive(PrimitiveType::Float),
default: Some(Expr::LitFloat(0.2)),
},
FieldSchema {
name: "max_tokens".into(),
ty: Type::Primitive(PrimitiveType::Int),
default: Some(Expr::LitInt(1024)),
},
FieldSchema {
name: "status".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("Pending".into())),
},
FieldSchema {
name: "text".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
FieldSchema {
name: "finish_reason".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
],
primary_key: vec!["id".into()],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: false,
});
registry.register(MoleculeSchema {
name: "Process".into(),
fields: vec![
FieldSchema {
name: "id".into(),
ty: Type::Primitive(PrimitiveType::Uuid),
default: Some(Expr::Call("uuid".into(), vec![])),
},
FieldSchema {
name: "cmd".into(),
ty: Type::Primitive(PrimitiveType::String),
default: None,
},
FieldSchema {
name: "args".into(),
ty: Type::List(Box::new(Type::Primitive(PrimitiveType::String))),
default: Some(Expr::ListLit(vec![])),
},
FieldSchema {
name: "status".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("Pending".into())),
},
FieldSchema {
name: "stdout".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
FieldSchema {
name: "stderr".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
FieldSchema {
name: "exit_code".into(),
ty: Type::Primitive(PrimitiveType::Int),
default: Some(Expr::LitInt(0)),
},
],
primary_key: vec!["id".into()],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: false,
});
registry.register(MoleculeSchema {
name: "LlmProvider".into(),
fields: vec![
FieldSchema {
name: "name".into(),
ty: Type::Primitive(PrimitiveType::String),
default: None,
},
FieldSchema {
name: "kind".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("openai_compat".into())),
},
FieldSchema {
name: "base_url".into(),
ty: Type::Optional(Box::new(Type::Primitive(PrimitiveType::String))),
default: Some(Expr::LitNull),
},
FieldSchema {
name: "token_file".into(),
ty: Type::Optional(Box::new(Type::Primitive(PrimitiveType::String))),
default: Some(Expr::LitNull),
},
FieldSchema {
name: "token_jq".into(),
ty: Type::Optional(Box::new(Type::Primitive(PrimitiveType::String))),
default: Some(Expr::LitNull),
},
],
primary_key: vec!["name".into()],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: false,
});
registry.register(MoleculeSchema {
name: "TerminalPrompt".into(),
fields: vec![
FieldSchema {
name: "id".into(),
ty: Type::Primitive(PrimitiveType::Uuid),
default: Some(Expr::Call("uuid".into(), vec![])),
},
FieldSchema {
name: "question".into(),
ty: Type::Primitive(PrimitiveType::String),
default: None,
},
FieldSchema {
name: "status".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("Pending".into())),
},
FieldSchema {
name: "answer".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
],
primary_key: vec!["id".into()],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: false,
});
registry.register(MoleculeSchema {
name: "Spinner".into(),
fields: vec![
FieldSchema {
name: "label".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString(String::new())),
},
FieldSchema {
name: "running".into(),
ty: Type::Primitive(PrimitiveType::Bool),
default: Some(Expr::LitBool(false)),
},
FieldSchema {
name: "status".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("Pending".into())),
},
],
primary_key: vec![],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: true,
});
registry.register(MoleculeSchema {
name: "TerminalWrite".into(),
fields: vec![
FieldSchema {
name: "id".into(),
ty: Type::Primitive(PrimitiveType::Uuid),
default: Some(Expr::Call("uuid".into(), vec![])),
},
FieldSchema {
name: "stream".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("stdout".into())),
},
FieldSchema {
name: "content".into(),
ty: Type::Primitive(PrimitiveType::String),
default: None,
},
FieldSchema {
name: "newline".into(),
ty: Type::Primitive(PrimitiveType::Bool),
default: Some(Expr::LitBool(true)),
},
FieldSchema {
name: "markdown".into(),
ty: Type::Primitive(PrimitiveType::Bool),
default: Some(Expr::LitBool(false)),
},
FieldSchema {
name: "status".into(),
ty: Type::Primitive(PrimitiveType::String),
default: Some(Expr::LitString("Pending".into())),
},
],
primary_key: vec!["id".into()],
merge: Some(MergeFn {
old_binding: "old".into(),
new_binding: "new".into(),
body: Expr::Ident("new".into()),
}),
is_singleton: false,
});
}
fn register_user_molecules(
registry: &TypeRegistry,
program: &Program,
) -> Result<(), Error> {
let mixins_by_name: std::collections::HashMap<&str, &MixinDecl> = program
.mixins
.iter()
.map(|m| (m.name.as_str(), m))
.collect();
for decl in &program.molecules {
let mut fields: Vec<FieldDecl> = Vec::new();
let mut inherited_pk: Option<Vec<String>> = None;
let mut inherited_merge: Option<MergeFn> = None;
for mixin_name in &decl.mixins {
let mixin = mixins_by_name.get(mixin_name.as_str()).ok_or_else(|| {
Error::Verify(format!(
"molecule `{}` references unknown mixin `{}`",
decl.name, mixin_name
))
})?;
for f in &mixin.fields {
if fields.iter().any(|x| x.name == f.name) {
return Err(Error::Verify(format!(
"molecule `{}`: field `{}` from mixin `{}` collides with an earlier mixin",
decl.name, f.name, mixin_name
)));
}
fields.push(f.clone());
}
if let Some(pk) = &mixin.primary_key {
if let Some(existing) = &inherited_pk {
if existing != pk {
return Err(Error::Verify(format!(
"molecule `{}`: conflicting primary_key from multiple mixins",
decl.name
)));
}
} else {
inherited_pk = Some(pk.clone());
}
}
if let Some(m) = &mixin.merge {
if inherited_merge.is_some() {
return Err(Error::Verify(format!(
"molecule `{}`: multiple mixins provide a merge clause",
decl.name
)));
}
inherited_merge = Some(m.clone());
}
}
for f in &decl.fields {
fields.retain(|x| x.name != f.name);
fields.push(f.clone());
}
let primary_key = match decl.primary_key.clone().or(inherited_pk) {
Some(pk) => pk,
None => {
return Err(Error::Verify(format!(
"molecule `{}` missing primary_key (not declared and no mixin provides one)",
decl.name
)))
}
};
let merge = decl.merge.clone().or(inherited_merge);
let schema_fields: Vec<FieldSchema> = fields
.iter()
.map(|f| FieldSchema {
name: f.name.clone(),
ty: type_expr_to_type(&f.ty),
default: f.default.clone(),
})
.collect();
let is_singleton = primary_key.is_empty();
registry.register(MoleculeSchema {
name: decl.name.clone(),
fields: schema_fields,
primary_key,
merge,
is_singleton,
});
}
Ok(())
}
fn type_expr_to_type(ty: &TypeExpr) -> Type {
match ty {
TypeExpr::Primitive(p) => Type::Primitive(*p),
TypeExpr::Named(n) => Type::Enum(n.clone()),
TypeExpr::List(inner) => Type::List(Box::new(type_expr_to_type(inner))),
TypeExpr::Optional(inner) => Type::Optional(Box::new(type_expr_to_type(inner))),
}
}
fn pass_typecheck(program: &Program, registry: &TypeRegistry) -> Result<(), Error> {
for reaction in &program.reactions {
for emit in &reaction.emit {
let schema = registry
.schema_by_name(&emit.molecule_name)
.ok_or_else(|| {
Error::Verify(format!(
"reaction {} emits unknown molecule {}",
reaction.name, emit.molecule_name
))
})?;
let known: BTreeSet<&str> =
schema.fields.iter().map(|f| f.name.as_str()).collect();
for fa in &emit.fields {
if !known.contains(fa.name.as_str()) {
return Err(Error::Verify(format!(
"reaction {} emits {}.{} but molecule has no such field",
reaction.name, emit.molecule_name, fa.name
)));
}
}
}
}
Ok(())
}
fn pass_coverage(program: &Program, registry: &TypeRegistry) -> Result<(), Error> {
for reaction in &program.reactions {
for pat in &reaction.when {
if registry.id_by_name(&pat.molecule_name).is_none() {
return Err(Error::Verify(format!(
"reaction {} when-pattern references unknown molecule {}",
reaction.name, pat.molecule_name
)));
}
}
for emit in &reaction.emit {
if registry.id_by_name(&emit.molecule_name).is_none() {
return Err(Error::Verify(format!(
"reaction {} emit references unknown molecule {}",
reaction.name, emit.molecule_name
)));
}
}
}
Ok(())
}
const EFFECT_KINDS: &[&str] = &[
"TerminalWrite",
"TerminalPrompt",
"Process",
"LlmCall",
];
fn pass_no_cycle(program: &Program, registry: &TypeRegistry) -> Result<(), Error> {
use std::collections::{BTreeMap, BTreeSet};
let mut edges: BTreeMap<MoleculeKindId, BTreeSet<MoleculeKindId>> = BTreeMap::new();
let mut name_of: BTreeMap<MoleculeKindId, String> = BTreeMap::new();
for reaction in &program.reactions {
let mut sources: Vec<(MoleculeKindId, String)> = Vec::new();
for pat in &reaction.when {
let id = registry.id_by_name(&pat.molecule_name).ok_or_else(|| {
Error::Verify(format!("unknown molecule {}", pat.molecule_name))
})?;
sources.push((id, pat.molecule_name.clone()));
name_of.entry(id).or_insert_with(|| pat.molecule_name.clone());
}
if let Some(rollup) = &reaction.rollup {
let id = registry.id_by_name(&rollup.molecule_name).ok_or_else(|| {
Error::Verify(format!(
"unknown rollup molecule {}",
rollup.molecule_name
))
})?;
sources.push((id, rollup.molecule_name.clone()));
name_of
.entry(id)
.or_insert_with(|| rollup.molecule_name.clone());
}
for emit in &reaction.emit {
let target = registry.id_by_name(&emit.molecule_name).ok_or_else(|| {
Error::Verify(format!("unknown emit molecule {}", emit.molecule_name))
})?;
name_of
.entry(target)
.or_insert_with(|| emit.molecule_name.clone());
for (s_id, s_name) in &sources {
if EFFECT_KINDS.contains(&s_name.as_str()) {
continue;
}
edges.entry(*s_id).or_default().insert(target);
}
}
}
#[derive(Clone, Copy, PartialEq)]
enum Color {
White,
Gray,
Black,
}
let mut color: BTreeMap<MoleculeKindId, Color> = BTreeMap::new();
for &k in edges.keys() {
color.insert(k, Color::White);
}
fn dfs(
node: MoleculeKindId,
edges: &BTreeMap<MoleculeKindId, BTreeSet<MoleculeKindId>>,
color: &mut BTreeMap<MoleculeKindId, Color>,
name_of: &BTreeMap<MoleculeKindId, String>,
path: &mut Vec<MoleculeKindId>,
) -> Result<(), Error> {
color.insert(node, Color::Gray);
path.push(node);
if let Some(succs) = edges.get(&node) {
for &next in succs {
match color.get(&next).copied().unwrap_or(Color::White) {
Color::White => dfs(next, edges, color, name_of, path)?,
Color::Gray => {
let mut chain: Vec<&str> = path
.iter()
.skip_while(|n| **n != next)
.filter_map(|n| name_of.get(n).map(String::as_str))
.collect();
if let Some(s) = name_of.get(&next).map(String::as_str) {
chain.push(s);
}
return Err(Error::Verify(format!(
"reaction graph has a cycle: {}",
chain.join(" -> ")
)));
}
Color::Black => {}
}
}
}
path.pop();
color.insert(node, Color::Black);
Ok(())
}
for &k in edges.keys().copied().collect::<Vec<_>>().iter() {
if matches!(color.get(&k).copied().unwrap_or(Color::White), Color::White) {
dfs(k, &edges, &mut color, &name_of, &mut Vec::new())?;
}
}
Ok(())
}
fn pass_no_self_join(program: &Program, registry: &TypeRegistry) -> Result<(), Error> {
use std::collections::HashSet;
for reaction in &program.reactions {
let mut seen: HashSet<MoleculeKindId> = HashSet::new();
for pat in &reaction.when {
let id = registry.id_by_name(&pat.molecule_name).ok_or_else(|| {
Error::Verify(format!("unknown molecule {}", pat.molecule_name))
})?;
if !seen.insert(id) {
return Err(Error::Verify(format!(
"reaction `{}` mentions {} more than once in `when` (self-joins are deferred to a later plan)",
reaction.name, pat.molecule_name
)));
}
}
}
Ok(())
}
type Eligibility = (
BTreeMap<MoleculeKindId, Vec<(usize, usize)>>,
BTreeMap<MoleculeKindId, Vec<usize>>,
);
fn build_eligibility(program: &Program, registry: &TypeRegistry) -> Result<Eligibility, Error> {
let mut when_map: BTreeMap<MoleculeKindId, Vec<(usize, usize)>> = BTreeMap::new();
let mut rollup_map: BTreeMap<MoleculeKindId, Vec<usize>> = BTreeMap::new();
for (ridx, reaction) in program.reactions.iter().enumerate() {
for (pos, pat) in reaction.when.iter().enumerate() {
let id = registry
.id_by_name(&pat.molecule_name)
.ok_or_else(|| Error::Verify(format!("unknown {}", pat.molecule_name)))?;
when_map.entry(id).or_default().push((ridx, pos));
}
if let Some(rollup) = &reaction.rollup {
let id = registry.id_by_name(&rollup.molecule_name).ok_or_else(|| {
Error::Verify(format!(
"unknown rollup molecule {}",
rollup.molecule_name
))
})?;
rollup_map.entry(id).or_default().push(ridx);
}
}
Ok((when_map, rollup_map))
}