flowlog-build 0.3.3

Build-time FlowLog compiler for library mode.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
//! Library-mode relation codegen.
//!
//! Emits:
//!
//! - Per-EDB `{Name}Input` structs with inherent methods (no `RelOps` trait,
//!   no dynamic dispatch) — see [`handler`].
//! - A concrete `Inputs` container the library engine holds directly.
//! - User-facing `rel::Foo` tuple aliases — see [`user`].
//!
//! Binary mode has its own relation codegen in `flowlog-compiler`.

mod handler;
pub(crate) mod user;

use std::collections::HashMap;
use std::io;

use proc_macro2::{Ident, TokenStream};
use quote::{format_ident, quote};

use crate::build::BuildError;
use crate::codegen::{CodegenError, Features};
use crate::parser::{Program, Relation};

/// Emit the body of the library-mode `relops` module — EDB input handlers
/// + the `Inputs` container — plus the `use` lines they depend on.
pub(crate) fn gen_input_module(
    program: &Program,
    features: &Features,
) -> Result<TokenStream, CodegenError> {
    let edbs = program.edbs();
    let string_intern = features.string_intern();

    let preamble = gen_preamble(program, features);
    let input_structs = edbs
        .iter()
        .map(|rel| handler::gen_input_struct(rel, program.facts().get(rel.name()), string_intern))
        .collect::<Result<Vec<_>, _>>()?;
    let inputs_container = gen_inputs_container(&edbs);

    Ok(quote! {
        #preamble
        #(#input_structs)*
        #inputs_container
    })
}

// ------------------------------------------------------------
// Naming helpers — shared across relation codegen consumers
// ------------------------------------------------------------

/// Convert a snake_case / lowercase relation name to `PascalCase`.
pub(crate) fn pascal_case(name: &str) -> String {
    let mut out = String::with_capacity(name.len());
    let mut capitalize = true;
    for c in name.chars() {
        if c == '_' || c == '-' {
            capitalize = true;
            continue;
        }
        if capitalize {
            out.extend(c.to_uppercase());
            capitalize = false;
        } else {
            out.push(c);
        }
    }
    out
}

/// Field ident inside the engine-internal `Inputs` container.
///
/// Prefixed, so no relation name can produce a Rust keyword — the
/// container is `pub(crate)`, so the prefix never surfaces in the user
/// API. Must stay in lockstep with every `inputs.<field>` access in
/// `build/engine`.
pub(crate) fn inputs_field_ident(rel: &Relation) -> Ident {
    format_ident!("in_{}", rel.name())
}

/// Field ident on the user-facing results structs (`BatchResults` /
/// `IncrementalResults`) and their typed locals: the canonical relation
/// name, verbatim — the published API contract (`results.<name>`).
/// Unrepresentable names were already rejected by [`validate_api_surface`].
pub(crate) fn results_field_ident(rel: &Relation) -> Ident {
    format_ident!("{}", rel.name())
}

/// Field ident for a `.printsize` relation on the results structs
/// (`<name>_size`). Single owner of the suffix so the generators and
/// [`validate_api_surface`] can never desync.
pub(crate) fn printsize_field_ident(rel: &Relation) -> Ident {
    format_ident!("{}_size", rel.name())
}

/// Ident for the user-facing struct generated from a relation (e.g. `Edge`).
pub(crate) fn user_struct_ident(rel: &Relation) -> Ident {
    format_ident!("{}", pascal_case(rel.name()))
}

/// Ident for the engine-internal input-handler struct (e.g. `EdgeInput`).
pub(crate) fn input_struct_ident(rel: &Relation) -> Ident {
    format_ident!("{}Input", pascal_case(rel.name()))
}

// ------------------------------------------------------------
// API-surface validation
// ------------------------------------------------------------

/// Reject programs whose library API cannot be generated faithfully.
///
/// The lib-mode API mirrors relation names: results fields are the
/// canonical name verbatim, `rel::` aliases are its PascalCase. Both are
/// emitted without escaping, so two failure modes exist and both are
/// rejected here with an actionable message instead of surfacing as a
/// rustc error inside generated code:
///
/// - a name no plain Rust ident can carry (a keyword: `.output Type`
///   lowers to field `type`);
/// - two relations whose names collapse to one ident (`foo_bar` and
///   `foo__bar` both pascal-case to `FooBar`).
///
/// Everything else about a keyword-named relation keeps working — EDB
/// surfaces are prefixed/suffixed (`insert_type`, `TypeInput`,
/// `type_size`), internal bindings are synthetic, and binary mode has no
/// API fields at all.
pub(crate) fn validate_api_surface(program: &Program) -> Result<(), BuildError> {
    // Results-struct field namespace: `.output` fields + `.printsize`
    // `<name>_size` fields live in the same struct.
    let mut fields: HashMap<String, String> = HashMap::new();
    for rel in program.output_idbs() {
        ensure_plain_ident(rel.name(), rel.raw_name(), "a results field")?;
        ensure_unique(
            &mut fields,
            rel.name().to_string(),
            rel.raw_name(),
            "results field",
        )?;
    }
    for rel in program.printsize_idbs() {
        let field = printsize_field_ident(rel).to_string();
        ensure_unique(&mut fields, field, rel.raw_name(), "results field")?;
    }

    // `rel::` alias namespace — iterate the *same* set the generator
    // emits ([`user::collect_user_rels`]) so the two can never desync.
    let mut aliases: HashMap<String, String> = HashMap::new();
    for rel in user::collect_user_rels(program) {
        let stem = pascal_case(rel.name());
        ensure_plain_ident(&stem, rel.raw_name(), "a `rel::` type alias")?;
        ensure_unique(&mut aliases, stem, rel.raw_name(), "`rel::` type alias")?;
    }

    // `<Pascal>Input` struct namespace — one handler struct per EDB
    // (nullary included), see [`handler`].
    let mut input_structs: HashMap<String, String> = HashMap::new();
    for rel in program.edbs() {
        let stem = input_struct_ident(rel).to_string();
        ensure_unique(
            &mut input_structs,
            stem,
            rel.raw_name(),
            "input-handler struct",
        )?;
    }

    Ok(())
}

/// `name` must be usable as a *plain* Rust ident (no `r#`, no escaping) —
/// `syn`'s ident parser is the authority, so there is no keyword list to
/// maintain.
fn ensure_plain_ident(name: &str, raw_name: &str, what: &str) -> Result<(), BuildError> {
    if syn::parse_str::<syn::Ident>(name).is_err() {
        return Err(BuildError::from(io::Error::new(
            io::ErrorKind::InvalidInput,
            format!(
                "relation `{raw_name}` cannot be exposed through the library API: \
                 `{name}` is not usable as {what} (it is a Rust keyword) — rename \
                 the relation, or drop its `.output`/`.printsize` directive"
            ),
        )));
    }
    Ok(())
}

/// Two distinct relations must never collapse onto one generated ident —
/// that would emit duplicate fields/aliases and fail in rustc with an
/// error pointing at generated code.
fn ensure_unique(
    owners: &mut HashMap<String, String>,
    ident: String,
    raw_name: &str,
    what: &str,
) -> Result<(), BuildError> {
    if let Some(prev) = owners.insert(ident.clone(), raw_name.to_string()) {
        return Err(BuildError::from(io::Error::new(
            io::ErrorKind::InvalidInput,
            format!(
                "relations `{prev}` and `{raw_name}` would both surface as the \
                 {what} `{ident}` in the generated library API — rename one of them"
            ),
        )));
    }
    Ok(())
}

// ------------------------------------------------------------
// Preamble
// ------------------------------------------------------------

/// `use` lines the emitted input-handler code needs — only pulls in
/// interning / `OrderedFloat` / `SEMIRING_ONE` when at least one EDB
/// actually needs them.
fn gen_preamble(program: &Program, features: &Features) -> TokenStream {
    let facts = program.facts();
    let edbs = program.edbs();
    let has_any_inline = edbs.iter().any(|rel| facts.contains_key(rel.name()));
    let needs_ordered_float = edbs
        .iter()
        .any(|rel| rel.data_type().iter().any(|dt| dt.is_float()));

    let intern_import = if features.string_intern() {
        quote! {
            use super::intern;
            use lasso::Spur;
        }
    } else {
        quote! {}
    };
    let ordered_float_import = if needs_ordered_float {
        quote! { use ordered_float::OrderedFloat; }
    } else {
        quote! {}
    };
    let semiring_one_import = if has_any_inline {
        quote! { use super::SEMIRING_ONE; }
    } else {
        quote! {}
    };

    quote! {
        use differential_dataflow::input::InputSession;

        use super::{Diff, Ts};
        #semiring_one_import
        #intern_import
        #ordered_float_import
    }
}

// ------------------------------------------------------------
// `Inputs` container — one field per EDB, bulk-apply helpers
// ------------------------------------------------------------

fn gen_inputs_container(edbs: &[&Relation]) -> TokenStream {
    if edbs.is_empty() {
        return quote! {};
    }

    let fields: Vec<TokenStream> = edbs
        .iter()
        .map(|rel| {
            let f = inputs_field_ident(rel);
            let ty = input_struct_ident(rel);
            quote! { pub #f: #ty }
        })
        .collect();

    // `Inputs::new` takes each already-constructed `{Name}Input` by value
    // so the signature stays free of `InputSession` type parameters (which
    // would make it unwieldy to call from the engine).
    let fn_params: Vec<TokenStream> = edbs
        .iter()
        .map(|rel| {
            let p = format_ident!("h_{}", rel.name());
            let ty = input_struct_ident(rel);
            quote! { #p: #ty }
        })
        .collect();

    let inits: Vec<TokenStream> = edbs
        .iter()
        .map(|rel| {
            let f = inputs_field_ident(rel);
            let p = format_ident!("h_{}", rel.name());
            quote! { #f: #p }
        })
        .collect();

    let per_field = |method: TokenStream| -> Vec<TokenStream> {
        edbs.iter()
            .map(|rel| {
                let f = inputs_field_ident(rel);
                quote! { self.#f.#method; }
            })
            .collect()
    };
    let apply_inline = per_field(quote! { apply_inline(index) });
    let close = per_field(quote! { close() });
    let advance = per_field(quote! { advance_to(t) });
    let flush = per_field(quote! { flush() });

    quote! {
        /// Concrete container holding one input handler per EDB. The library
        /// engine owns this and calls typed methods directly on each field
        /// — no dynamic dispatch, no downcast.
        pub(crate) struct Inputs {
            #(#fields,)*
        }

        impl Inputs {
            pub fn new(#(#fn_params),*) -> Self {
                Self { #(#inits,)* }
            }

            pub fn apply_inline_all(&mut self, index: usize) {
                #(#apply_inline)*
            }

            pub fn close_all(&mut self) {
                #(#close)*
            }

            pub fn advance_to_all(&mut self, t: Ts) {
                #(#advance)*
            }

            pub fn flush_all(&mut self) {
                #(#flush)*
            }
        }
    }
}

#[cfg(test)]
mod api_surface_tests {
    use std::collections::HashMap;

    use super::{ensure_plain_ident, ensure_unique, pascal_case};

    /// Representative keywords across the strict / reserved / non-raw-able
    /// classes must be rejected as verbatim API field names. `syn`'s ident
    /// parser is the authority for the full set — no keyword list to
    /// maintain here.
    #[test]
    fn keywords_are_rejected_as_plain_idents() {
        for kw in [
            "type", "match", "in", "loop", "self", "Self", "crate", "super", "yield", "try",
        ] {
            assert!(
                ensure_plain_ident(kw, kw, "a results field").is_err(),
                "keyword {kw:?} should be rejected as a verbatim API ident"
            );
        }
    }

    /// Ordinary names — including the underscore-twins that used to collide
    /// under escape-based handling — are accepted verbatim.
    #[test]
    fn ordinary_names_are_accepted() {
        for name in ["varpointsto", "method_lookup", "crate_", "self_", "type_"] {
            assert!(
                ensure_plain_ident(name, name, "a results field").is_ok(),
                "name {name:?} should be accepted"
            );
        }
    }

    /// Distinct relations collapsing onto one ident are rejected with both
    /// owners named.
    #[test]
    fn duplicate_idents_are_rejected() {
        let mut owners = HashMap::new();
        ensure_unique(&mut owners, "x_size".into(), "x_size", "results field").unwrap();
        // `.printsize x` also wants the `x_size` field.
        let err = ensure_unique(&mut owners, "x_size".into(), "x", "results field").unwrap_err();
        let msg = err.to_string();
        assert!(msg.contains("x_size") && msg.contains('x'), "{msg}");
    }

    /// PascalCase collapses `foo_bar` / `foo__bar`, and turns `self` into
    /// the un-emittable `Self` — both must be caught by the same guards.
    #[test]
    fn pascal_namespace_hazards_are_caught() {
        assert_eq!(pascal_case("foo_bar"), pascal_case("foo__bar"));
        let mut owners = HashMap::new();
        ensure_unique(
            &mut owners,
            pascal_case("foo_bar"),
            "foo_bar",
            "`rel::` type alias",
        )
        .unwrap();
        assert!(
            ensure_unique(
                &mut owners,
                pascal_case("foo__bar"),
                "foo__bar",
                "`rel::` type alias"
            )
            .is_err()
        );
        assert_eq!(pascal_case("self"), "Self");
        assert!(ensure_plain_ident(&pascal_case("self"), "self", "a `rel::` type alias").is_err());
    }
}