Skip to main content

flowlog_build/common/
config.rs

1//! Command line argument parsing for FlowLog tools.
2
3use clap::{Parser, ValueEnum};
4use std::path::{Path, PathBuf};
5use std::{fs, process};
6
7/// Execution strategy for FlowLog workflows
8#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum, Default)]
9pub enum ExecutionMode {
10    /// Datalog single-pass batch execution.
11    /// Only tracks whether facts are present or absent,
12    /// making it suitable for high-performance static execution.
13    #[default]
14    DatalogBatch,
15    /// Datalog incremental execution.
16    /// Maintains state across updates, tracking how many times each fact
17    /// is derived, supporting incremental view maintenance.
18    DatalogInc,
19    /// Extended batch execution with explicit `loop` blocks.
20    /// Recursion is only allowed inside `loop` blocks; any recursive
21    /// dependency in plain rules is a hard error.
22    ExtendBatch,
23    /// Extended incremental execution with explicit `loop` blocks.
24    /// Combines incremental view maintenance with explicit loop control.
25    ExtendInc,
26}
27
28impl ExecutionMode {
29    pub(crate) fn is_incremental(self) -> bool {
30        matches!(self, Self::DatalogInc | Self::ExtendInc)
31    }
32
33    pub(crate) fn is_batch(self) -> bool {
34        matches!(self, Self::DatalogBatch | Self::ExtendBatch)
35    }
36}
37
38/// Command line arguments for FlowLog tools
39#[derive(Parser, Debug, Clone, Default)]
40#[command(version, about, long_about = None)]
41pub struct Config {
42    /// Path to the Datalog (.dl) program file
43    #[arg(value_name = "PROGRAM")]
44    pub program: String,
45
46    /// Directory containing input fact files
47    #[arg(short = 'F', long, value_name = "DIR")]
48    pub fact_dir: Option<String>,
49
50    /// Path for the generated Rust executable
51    #[arg(short = 'o', value_name = "PATH")]
52    pub executable_path: Option<String>,
53
54    /// Directory for writing output relations. Use `-` for stdout
55    #[arg(short = 'D', long, value_name = "DIR")]
56    pub output_dir: Option<String>,
57
58    /// Execution strategy: `datalog-batch` (default), `datalog-inc`,
59    /// `extend-batch`, or `extend-inc`.
60    /// Extended modes enable explicit `loop` blocks and forbid implicit recursion.
61    #[arg(long, value_enum, default_value = "datalog-batch", value_name = "MODE")]
62    pub mode: ExecutionMode,
63
64    /// Collect per-rule execution statistics (timing, tuple counts)
65    #[arg(long, short = 'P')]
66    pub profile: bool,
67
68    /// Enable Sideways Information Passing to propagate binding constraints
69    /// from rule heads into body atoms, reducing intermediate results
70    #[arg(long)]
71    pub sip: bool,
72
73    /// Intern string columns as compact integer keys at load time for faster
74    /// joins, hashing, and lower memory usage. Recommended when the majority
75    /// of join keys are string-typed
76    #[arg(long)]
77    pub str_intern: bool,
78
79    /// Path to a Rust source file containing UDF implementations.
80    /// Functions declared with `.extern fn` in the Datalog
81    /// program must be defined in this file.
82    #[arg(long, value_name = "PATH")]
83    pub udf_file: Option<String>,
84
85    /// Keep the intermediate generated Rust crate instead of cleaning it up
86    /// after building the executable.
87    #[arg(long)]
88    pub save_temps: bool,
89
90    /// Extra search directory for `.include` directives. May be specified
91    /// multiple times. Includes are resolved by trying the parent file's
92    /// directory first, then each `-I` directory in order.
93    #[arg(short = 'I', long = "include-dir", value_name = "DIR")]
94    pub include_dirs: Vec<String>,
95}
96
97impl Config {
98    pub fn program(&self) -> &str {
99        &self.program
100    }
101
102    pub fn should_process_all(&self) -> bool {
103        self.program == "all" || self.program == "--all"
104    }
105
106    pub fn program_name(&self) -> String {
107        Path::new(&self.program)
108            .file_stem()
109            .and_then(|stem| stem.to_str())
110            .map(|s| s.to_string())
111            .unwrap_or_else(|| "unknown_program".into())
112    }
113
114    pub fn fact_dir(&self) -> Option<&str> {
115        self.fact_dir.as_deref()
116    }
117
118    pub fn executable_path(&self) -> PathBuf {
119        self.executable_path
120            .as_ref()
121            .map(PathBuf::from)
122            .unwrap_or_else(|| PathBuf::from(self.program_name()))
123    }
124
125    /// Intermediate build directory for the generated Rust crate.
126    /// Uses a hidden dotfile name (e.g., `.galen.build/`) so it won't collide
127    /// with the final executable or any user files.
128    pub fn build_dir(&self) -> PathBuf {
129        let exe = self.executable_path();
130        let name = exe.file_name().and_then(|n| n.to_str()).unwrap_or("out");
131        exe.with_file_name(format!(".{name}.build"))
132    }
133
134    pub fn executable_name(&self) -> String {
135        self.executable_path()
136            .file_name()
137            .and_then(|name| name.to_str())
138            .unwrap_or("out")
139            .to_string()
140    }
141
142    /// Sanitized name suitable for use as a Cargo package/binary name.
143    /// Replaces characters that Cargo rejects (dots, spaces, etc.) with
144    /// underscores and ensures the result doesn't start with a digit.
145    pub fn crate_name(&self) -> String {
146        let raw = self.executable_name();
147        let mut s: String = raw
148            .chars()
149            .map(|c| {
150                if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
151                    c
152                } else {
153                    '_'
154                }
155            })
156            .collect();
157        // Cargo rejects names starting with a digit.
158        if s.starts_with(|c: char| c.is_ascii_digit()) {
159            s.insert_str(0, "fl_");
160        }
161        if s.is_empty() {
162            s = "out".to_string();
163        }
164        s
165    }
166
167    pub fn output_dir(&self) -> Option<&str> {
168        self.output_dir.as_deref()
169    }
170
171    pub fn output_to_stdout(&self) -> bool {
172        self.output_dir.as_deref() == Some("-")
173    }
174
175    pub fn mode(&self) -> ExecutionMode {
176        self.mode
177    }
178
179    pub fn is_incremental(&self) -> bool {
180        self.mode.is_incremental()
181    }
182
183    pub fn is_batch(&self) -> bool {
184        self.mode.is_batch()
185    }
186
187    /// Whether the mode is `DatalogBatch`. This is the only mode that uses
188    /// `Present` diff; all other modes use `i32` diff for multiplicity tracking.
189    pub fn is_datalog_batch(&self) -> bool {
190        self.mode == ExecutionMode::DatalogBatch
191    }
192
193    /// Whether Extended Datalog mode is enabled (loop blocks allowed,
194    /// implicit recursion forbidden).
195    pub fn is_extended(&self) -> bool {
196        matches!(
197            self.mode,
198            ExecutionMode::ExtendBatch | ExecutionMode::ExtendInc
199        )
200    }
201
202    /// Returns the configured fact directory, panicking if unset.
203    pub fn fact_dir_required(&self) -> &str {
204        self.fact_dir
205            .as_ref()
206            .expect("--fact-dir is required for this tool")
207    }
208
209    /// Returns the configured output directory, panicking if unset.
210    pub fn output_dir_required(&self) -> &str {
211        self.output_dir
212            .as_ref()
213            .expect("--output-dir is required for this tool")
214    }
215
216    /// Whether profiling instrumentation is enabled.
217    pub fn profiling_enabled(&self) -> bool {
218        if self.profile && self.is_extended() {
219            unimplemented!("-P (profiling) is not yet supported with extended modes");
220        }
221        self.profile
222    }
223
224    /// Whether Sideways Information Passing (SIP) optimization is enabled.
225    pub fn sip_enabled(&self) -> bool {
226        self.sip
227    }
228
229    /// Whether string interning is enabled.
230    pub fn str_intern_enabled(&self) -> bool {
231        self.str_intern
232    }
233
234    /// Path to the user-supplied UDF implementation file, if any.
235    pub fn udf_file(&self) -> Option<&str> {
236        self.udf_file.as_deref()
237    }
238
239    /// Extra `.include` search directories collected from `-I` flags.
240    pub fn include_dirs(&self) -> Vec<&Path> {
241        self.include_dirs.iter().map(Path::new).collect()
242    }
243
244    /// Whether to keep the intermediate generated Rust crate.
245    pub fn save_temps(&self) -> bool {
246        self.save_temps
247    }
248}
249
250/// Get all .dl files from the example directory, sorted alphabetically
251pub fn get_example_files() -> Vec<std::path::PathBuf> {
252    let example_dir = "example";
253
254    // Check if example directory exists
255    if !Path::new(example_dir).exists() {
256        eprintln!("Error: Directory '{}' not found", example_dir);
257        process::exit(1);
258    }
259
260    // Recursively collect all .dl files under example/
261    let mut files = Vec::new();
262    let mut dirs = vec![PathBuf::from(example_dir)];
263    while let Some(dir) = dirs.pop() {
264        let entries = match fs::read_dir(&dir) {
265            Ok(entries) => entries,
266            Err(e) => {
267                eprintln!("Error reading dir '{}': {}", dir.display(), e);
268                continue;
269            }
270        };
271        for entry in entries.flatten() {
272            let path = entry.path();
273            if path.is_dir() {
274                dirs.push(path);
275            } else if path.extension().and_then(|s| s.to_str()) == Some("dl") {
276                files.push(path);
277            }
278        }
279    }
280
281    files.sort();
282
283    if files.is_empty() {
284        eprintln!("No .dl files found in {}", example_dir);
285        process::exit(1);
286    }
287
288    files
289}