flowlog_build/lib.rs
1//! Build-script integration for FlowLog library mode.
2//!
3//! Compiles a `.dl` program into a Rust module your crate `include!`s
4//! from `build.rs`.
5//!
6//! # Minimal
7//!
8//! ```no_run
9//! // build.rs
10//! fn main() -> std::io::Result<()> {
11//! flowlog_build::compile("policy.dl")
12//! }
13//! ```
14//!
15//! ```ignore
16//! // src/lib.rs
17//! pub mod policy { include!(concat!(env!("OUT_DIR"), "/policy.rs")); }
18//!
19//! use policy::DatalogBatchEngine;
20//! let mut engine = DatalogBatchEngine::new(4);
21//! engine.insert_edge(vec![(1, 2), (2, 3)]);
22//! let results = engine.run();
23//! ```
24//!
25//! # Structured errors
26//!
27//! [`Builder::compile`] returns a [`BoxError`] for callers that want to
28//! render the diagnostic themselves rather than surface it through
29//! [`io::Error`]:
30//!
31//! ```no_run
32//! use flowlog_build::Builder;
33//!
34//! // build.rs
35//! if let Err(err) = Builder::default()
36//! .sip(true)
37//! .string_intern(true)
38//! .compile(&["policy.dl", "auth.dl"], &[] as &[&std::path::Path])
39//! {
40//! eprintln!("{err}");
41//! std::process::exit(1);
42//! }
43//! ```
44
45// Library-mode build flow (parse → stratify → plan → codegen → emit
46// `$OUT_DIR/<stem>.rs`). Binary mode (`flowlog-compiler`) bypasses this
47// and goes straight to `codegen`.
48mod build;
49
50// Shared codegen core — consumed by this crate's library mode and, via
51// the re-exports below, by `flowlog-compiler`'s binary mode.
52mod codegen;
53
54// Shared primitives — previously the `common` crate, folded in here.
55#[doc(hidden)]
56pub mod common;
57
58// Pipeline stages — previously independent crates, folded in here so
59// `flowlog-build` ships as a single publishable library.
60//
61// NOTE: These modules are `pub` because the `flowlog-compiler` binary
62// (separate crate, `publish = false`) and the integration tests under
63// `tests/` both reach into them. They are `#[doc(hidden)]` to signal
64// that they are NOT part of the stable public API — do not rely on
65// them from external crates.
66#[doc(hidden)]
67pub mod catalog;
68#[doc(hidden)]
69pub mod optimizer;
70#[doc(hidden)]
71pub mod parser;
72#[doc(hidden)]
73pub mod planner;
74#[doc(hidden)]
75pub mod profiler;
76#[doc(hidden)]
77pub mod stratifier;
78#[doc(hidden)]
79pub mod typechecker;
80
81pub use build::BuildError;
82
83// Internal codegen re-exports — only consumed by `flowlog-compiler`.
84// Hidden from docs.rs for the same reason as the pipeline modules above.
85#[doc(hidden)]
86pub use codegen::{
87 const_to_token, data_type_tokens, field_accessor, gen_drain_block, AggSemiringNeeds, CodeGen,
88 CodeParts, CodegenError, Features,
89};
90
91use std::io;
92use std::path::{Path, PathBuf};
93
94pub use crate::common::ExecutionMode;
95use crate::common::{emit, BoxError, SourceMap};
96
97/// Compile a single `.dl` program with default options.
98///
99/// Any pipeline diagnostic is rendered against its source map into the
100/// returned [`io::Error`]'s body, so `cargo build` shows a
101/// source-annotated message. For structured error access, use
102/// [`Builder::compile`].
103pub fn compile<P: AsRef<Path>>(program_path: P) -> io::Result<()> {
104 let out_dir = cargo_out_dir()?;
105 let mut sm = SourceMap::new();
106 Builder::default()
107 .compile_one(program_path.as_ref(), &out_dir, &mut sm)
108 .map_err(|err| {
109 let mut buf = Vec::new();
110 let _ = emit(&err, &sm, &mut buf);
111 io::Error::other(String::from_utf8_lossy(&buf).into_owned())
112 })
113}
114
115/// Chained configuration for advanced compilation options. For default
116/// settings prefer the free [`compile`] function.
117#[derive(Default)]
118pub struct Builder {
119 pub(crate) sip: bool,
120 pub(crate) string_intern: bool,
121 pub(crate) mode: ExecutionMode,
122 pub(crate) profile: bool,
123 pub(crate) include_dirs: Vec<PathBuf>,
124 pub(crate) udf_file: Option<PathBuf>,
125}
126
127impl Builder {
128 /// Enable Sideways Information Passing.
129 pub fn sip(mut self, enabled: bool) -> Self {
130 self.sip = enabled;
131 self
132 }
133
134 /// Enable string interning. User-facing tuple slots stay `String`;
135 /// interning is applied at `insert_<rel>` / drain.
136 pub fn string_intern(mut self, enabled: bool) -> Self {
137 self.string_intern = enabled;
138 self
139 }
140
141 /// Set the execution mode. Defaults to [`ExecutionMode::DatalogBatch`].
142 ///
143 /// Batch modes (`DatalogBatch`, `ExtendBatch`) emit a
144 /// `DatalogBatchEngine` with a single `run()` method. Incremental
145 /// modes (`DatalogInc`, `ExtendInc`) emit a
146 /// `DatalogIncrementalEngine` that maintains state across
147 /// `Transaction`-scoped commits.
148 pub fn mode(mut self, mode: ExecutionMode) -> Self {
149 self.mode = mode;
150 self
151 }
152
153 /// Path to the UDF source file, included as `mod udf` inside the
154 /// generated module. Generated code calls UDFs as `udf::<fn_name>(…)`.
155 pub fn udf_file(mut self, path: impl AsRef<Path>) -> Self {
156 self.udf_file = Some(path.as_ref().to_path_buf());
157 self
158 }
159
160 /// Enable operator-level profiling. When set:
161 /// - a static plan graph is written to `$OUT_DIR/log/ops.json` at build time;
162 /// - the generated engine registers timely + DD arrangement loggers
163 /// and writes `log/time/*.log` and `log/memory/*.log` cwd-relative
164 /// at runtime (batch: once at end; incremental: per commit).
165 ///
166 /// Not supported under `ExtendBatch` / `ExtendInc`; compilation
167 /// panics if the combination is requested.
168 pub fn profile(mut self, enabled: bool) -> Self {
169 self.profile = enabled;
170 self
171 }
172
173 /// Compile one or more `.dl` programs. Each input produces a
174 /// `<stem>.rs` file under `$OUT_DIR`.
175 ///
176 /// `include_dirs` is searched for `.include` directives after each
177 /// file's own directory. Builder settings apply to every input.
178 pub fn compile<P, I>(mut self, program_paths: &[P], include_dirs: &[I]) -> Result<(), BoxError>
179 where
180 P: AsRef<Path>,
181 I: AsRef<Path>,
182 {
183 self.include_dirs = include_dirs
184 .iter()
185 .map(|p| p.as_ref().to_path_buf())
186 .collect();
187
188 let out_dir = cargo_out_dir().map_err(BuildError::from)?;
189 for program_path in program_paths {
190 let mut sm = SourceMap::new();
191 self.compile_one(program_path.as_ref(), &out_dir, &mut sm)?;
192 }
193 Ok(())
194 }
195
196 /// Compile one `.dl` program, populating the caller's [`SourceMap`]
197 /// so any returned [`BoxError`] can be rendered against the source.
198 fn compile_one(
199 &self,
200 program_path: &Path,
201 out_dir: &Path,
202 sm: &mut SourceMap,
203 ) -> Result<(), BoxError> {
204 let stem = program_path
205 .file_stem()
206 .and_then(|s| s.to_str())
207 .ok_or_else(|| {
208 BuildError::from(io::Error::new(
209 io::ErrorKind::InvalidInput,
210 format!(
211 "program path has no usable file stem: {}",
212 program_path.display()
213 ),
214 ))
215 })?;
216
217 let output = build::Pipeline::build(self, program_path, sm)?;
218 let source = build::assemble(&output, out_dir).map_err(BuildError::from)?;
219 self.emit_semiring_modules(&output, out_dir)
220 .map_err(BuildError::from)?;
221 std::fs::write(out_dir.join(format!("{stem}.rs")), source).map_err(BuildError::from)?;
222 self.emit_rerun_if_changed(program_path);
223 Ok(())
224 }
225
226 /// Write aggregation-specific semiring modules to `$OUT_DIR/semiring/`.
227 ///
228 /// Library mode only has `flowlog-runtime` as a runtime dep, so we
229 /// prepend aliases that route `serde` / `ordered_float` /
230 /// `differential_dataflow` through `::flowlog_runtime::` — keeping
231 /// the templates mode-agnostic with binary mode.
232 fn emit_semiring_modules(&self, output: &build::Pipeline, out_dir: &Path) -> io::Result<()> {
233 if output.parts.semiring_modules.is_empty() {
234 return Ok(());
235 }
236 let semiring_dir = out_dir.join("semiring");
237 std::fs::create_dir_all(&semiring_dir)?;
238
239 const LIB_ALIASES: &str = "\
240use ::flowlog_runtime::serde;
241use ::flowlog_runtime::ordered_float;
242use ::flowlog_runtime::differential_dataflow;
243";
244
245 for (rel_path, content) in &output.parts.semiring_modules {
246 let fname = Path::new(rel_path)
247 .file_name()
248 .expect("semiring module path has no file name");
249 let dst = semiring_dir.join(fname);
250 if fname == "mod.rs" {
251 std::fs::write(dst, content)?;
252 } else {
253 std::fs::write(dst, format!("{LIB_ALIASES}{content}"))?;
254 }
255 }
256 Ok(())
257 }
258
259 /// Emit `cargo:rerun-if-changed` for the program, the UDF file, and
260 /// every include directory.
261 fn emit_rerun_if_changed(&self, program_path: &Path) {
262 println!("cargo:rerun-if-changed={}", program_path.display());
263 if let Some(udf) = &self.udf_file {
264 println!("cargo:rerun-if-changed={}", udf.display());
265 }
266 for inc in &self.include_dirs {
267 println!("cargo:rerun-if-changed={}", inc.display());
268 }
269 }
270}
271
272fn cargo_out_dir() -> io::Result<PathBuf> {
273 std::env::var_os("OUT_DIR")
274 .map(PathBuf::from)
275 .ok_or_else(|| {
276 io::Error::new(
277 io::ErrorKind::NotFound,
278 "OUT_DIR not set — run from a build.rs",
279 )
280 })
281}