1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
//! Command line argument parsing for FlowLog tools.
use clap::{Parser, ValueEnum};
use std::path::{Path, PathBuf};
use std::{fs, process};
/// Execution strategy for FlowLog workflows
#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum, Default)]
pub enum ExecutionMode {
/// Datalog single-pass batch execution.
/// Only tracks whether facts are present or absent,
/// making it suitable for high-performance static execution.
#[default]
DatalogBatch,
/// Datalog incremental execution.
/// Maintains state across updates, tracking how many times each fact
/// is derived, supporting incremental view maintenance.
DatalogInc,
/// Extended batch execution with explicit `loop` blocks.
/// Recursion is only allowed inside `loop` blocks; any recursive
/// dependency in plain rules is a hard error.
ExtendBatch,
/// Extended incremental execution with explicit `loop` blocks.
/// Combines incremental view maintenance with explicit loop control.
ExtendInc,
}
impl ExecutionMode {
pub(crate) fn is_incremental(self) -> bool {
matches!(self, Self::DatalogInc | Self::ExtendInc)
}
pub(crate) fn is_batch(self) -> bool {
matches!(self, Self::DatalogBatch | Self::ExtendBatch)
}
}
/// Command line arguments for FlowLog tools
#[derive(Parser, Debug, Clone, Default)]
#[command(version, about, long_about = None)]
pub struct Config {
/// Path to the Datalog (.dl) program file
#[arg(value_name = "PROGRAM")]
pub program: String,
/// Directory containing input fact files
#[arg(short = 'F', long, value_name = "DIR")]
pub fact_dir: Option<String>,
/// Path for the generated Rust executable
#[arg(short = 'o', value_name = "PATH")]
pub executable_path: Option<String>,
/// Directory for writing output relations. Use `-` for stdout
#[arg(short = 'D', long, value_name = "DIR")]
pub output_dir: Option<String>,
/// Execution strategy: `datalog-batch` (default), `datalog-inc`,
/// `extend-batch`, or `extend-inc`.
/// Extended modes enable explicit `loop` blocks and forbid implicit recursion.
#[arg(long, value_enum, default_value = "datalog-batch", value_name = "MODE")]
pub mode: ExecutionMode,
/// Collect per-rule execution statistics (timing, tuple counts)
#[arg(long, short = 'P')]
pub profile: bool,
/// Enable Sideways Information Passing to propagate binding constraints
/// from rule heads into body atoms, reducing intermediate results
#[arg(long)]
pub sip: bool,
/// Intern string columns as compact integer keys at load time for faster
/// joins, hashing, and lower memory usage. Recommended when the majority
/// of join keys are string-typed
#[arg(long)]
pub str_intern: bool,
/// Path to a Rust source file containing UDF implementations.
/// Functions declared with `.extern fn` in the Datalog
/// program must be defined in this file.
#[arg(long, value_name = "PATH")]
pub udf_file: Option<String>,
/// Keep the intermediate generated Rust crate instead of cleaning it up
/// after building the executable.
#[arg(long)]
pub save_temps: bool,
/// Extra search directory for `.include` directives. May be specified
/// multiple times. Includes are resolved by trying the parent file's
/// directory first, then each `-I` directory in order.
#[arg(short = 'I', long = "include-dir", value_name = "DIR")]
pub include_dirs: Vec<String>,
}
impl Config {
pub fn program(&self) -> &str {
&self.program
}
pub fn should_process_all(&self) -> bool {
self.program == "all" || self.program == "--all"
}
pub fn program_name(&self) -> String {
Path::new(&self.program)
.file_stem()
.and_then(|stem| stem.to_str())
.map(|s| s.to_string())
.unwrap_or_else(|| "unknown_program".into())
}
pub fn fact_dir(&self) -> Option<&str> {
self.fact_dir.as_deref()
}
pub fn executable_path(&self) -> PathBuf {
self.executable_path
.as_ref()
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from(self.program_name()))
}
/// Intermediate build directory for the generated Rust crate.
/// Uses a hidden dotfile name (e.g., `.galen.build/`) so it won't collide
/// with the final executable or any user files.
pub fn build_dir(&self) -> PathBuf {
let exe = self.executable_path();
let name = exe.file_name().and_then(|n| n.to_str()).unwrap_or("out");
exe.with_file_name(format!(".{name}.build"))
}
pub fn executable_name(&self) -> String {
self.executable_path()
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("out")
.to_string()
}
/// Sanitized name suitable for use as a Cargo package/binary name.
/// Replaces characters that Cargo rejects (dots, spaces, etc.) with
/// underscores and ensures the result doesn't start with a digit.
pub fn crate_name(&self) -> String {
let raw = self.executable_name();
let mut s: String = raw
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
c
} else {
'_'
}
})
.collect();
// Cargo rejects names starting with a digit.
if s.starts_with(|c: char| c.is_ascii_digit()) {
s.insert_str(0, "fl_");
}
if s.is_empty() {
s = "out".to_string();
}
s
}
pub fn output_dir(&self) -> Option<&str> {
self.output_dir.as_deref()
}
pub fn output_to_stdout(&self) -> bool {
self.output_dir.as_deref() == Some("-")
}
pub fn mode(&self) -> ExecutionMode {
self.mode
}
pub fn is_incremental(&self) -> bool {
self.mode.is_incremental()
}
pub fn is_batch(&self) -> bool {
self.mode.is_batch()
}
/// Whether the mode is `DatalogBatch`. This is the only mode that uses
/// `Present` diff; all other modes use `i32` diff for multiplicity tracking.
pub fn is_datalog_batch(&self) -> bool {
self.mode == ExecutionMode::DatalogBatch
}
/// Whether Extended Datalog mode is enabled (loop blocks allowed,
/// implicit recursion forbidden).
pub fn is_extended(&self) -> bool {
matches!(
self.mode,
ExecutionMode::ExtendBatch | ExecutionMode::ExtendInc
)
}
/// Returns the configured fact directory, panicking if unset.
pub fn fact_dir_required(&self) -> &str {
self.fact_dir
.as_ref()
.expect("--fact-dir is required for this tool")
}
/// Returns the configured output directory, panicking if unset.
pub fn output_dir_required(&self) -> &str {
self.output_dir
.as_ref()
.expect("--output-dir is required for this tool")
}
/// Whether profiling instrumentation is enabled.
pub fn profiling_enabled(&self) -> bool {
if self.profile && self.is_extended() {
unimplemented!("-P (profiling) is not yet supported with extended modes");
}
self.profile
}
/// Whether Sideways Information Passing (SIP) optimization is enabled.
pub fn sip_enabled(&self) -> bool {
self.sip
}
/// Whether string interning is enabled.
pub fn str_intern_enabled(&self) -> bool {
self.str_intern
}
/// Path to the user-supplied UDF implementation file, if any.
pub fn udf_file(&self) -> Option<&str> {
self.udf_file.as_deref()
}
/// Extra `.include` search directories collected from `-I` flags.
pub fn include_dirs(&self) -> Vec<&Path> {
self.include_dirs.iter().map(Path::new).collect()
}
/// Whether to keep the intermediate generated Rust crate.
pub fn save_temps(&self) -> bool {
self.save_temps
}
}
/// Get all .dl files from the example directory, sorted alphabetically
pub fn get_example_files() -> Vec<std::path::PathBuf> {
let example_dir = "example";
// Check if example directory exists
if !Path::new(example_dir).exists() {
eprintln!("Error: Directory '{}' not found", example_dir);
process::exit(1);
}
// Recursively collect all .dl files under example/
let mut files = Vec::new();
let mut dirs = vec![PathBuf::from(example_dir)];
while let Some(dir) = dirs.pop() {
let entries = match fs::read_dir(&dir) {
Ok(entries) => entries,
Err(e) => {
eprintln!("Error reading dir '{}': {}", dir.display(), e);
continue;
}
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
dirs.push(path);
} else if path.extension().and_then(|s| s.to_str()) == Some("dl") {
files.push(path);
}
}
}
files.sort();
if files.is_empty() {
eprintln!("No .dl files found in {}", example_dir);
process::exit(1);
}
files
}