1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
[]
= ["fuzz"]
# IP Notice. The theoretical framework, formal constructions, and supervisory
# methods described herein constitute proprietary Background IP of Invariant
# Forge LLC (Delaware LLC No. 10529072), with prior art established by this
# publication and earlier Zenodo DOI publications by the same author.
# Commercial deployment requires a separate written license. Reference
# implementations are released under Apache 2.0.
# Licensing: licensing@invariantforge.net
[]
= "dsfb-database"
= "0.1.1"
= "2021"
= "1.74"
= ["Riaan de Beer <riaan@invariantforge.net>"]
= "dsfb-database"
= "DSFB-Database: deterministic, read-only structural observer for residual trajectories in SQL database telemetry. Empirical prior-art demonstration on Snowset, SQLShare, CEB, JOB, and TPC-DS."
= "Apache-2.0"
= "README.md"
= "https://github.com/infinityabundance/dsfb"
= "https://github.com/infinityabundance/dsfb"
= "https://docs.rs/dsfb-database"
= ["dsfb", "database", "observability", "residual", "sql"]
= ["science", "command-line-utilities"]
= [
"src/**",
"spec/**",
"tests/**",
"examples/**",
"audit/**",
"colab/dsfb_database_repro.ipynb",
"/Cargo.toml",
"/README.md",
"/LICENSE",
"/NOTICE",
"/CITATION.cff",
]
# Phase-C7: library-mode consumers pay for core adapter + grammar +
# metrics + fingerprint machinery only. The `cli` feature adds `clap`
# for the bundled binaries; the `report` feature adds `plotters` +
# `serde_json` for PNG/JSON emission. `full` is the convenience
# superset users get from `cargo install dsfb-database --features full`.
# `cargo tree --depth 1` on default features reports ≤10 direct
# dependencies; `--features report` stays ≤13 (Phase-C DoD).
[]
= ["cli"]
= ["dep:clap"]
= ["dep:plotters", "dep:serde_json"]
# Phase-C1: OpenTelemetry DB-spans ingestor. Adds a JSON-array adapter
# for a simplified OTel DB-span shape that is forward-compatible with
# the OTLP/JSON export format used by `otel-collector` in 2026 (see
# `src/adapters/otel.rs` for the shape). Pulls `serde_json`.
= ["dep:serde_json"]
# Live read-only PostgreSQL telemetry adapter. Adds an async tokio
# current-thread runtime and tokio-postgres client for pulsed-scrape
# observation of pg_stat_statements / pg_stat_activity / pg_stat_io.
# Implies `report` because live mode emits episode CSVs and JSON
# sidecars. See `src/live/*` and `paper/dsfb-database.tex §Live
# read-only adapter` for the contract — determinism migrates to the
# tape artefact, not the live engine→tape path.
= ["dep:tokio", "dep:tokio-postgres", "dep:futures-util", "report"]
# Live read-only MySQL telemetry adapter (second engine). Shares the
# three-layer contract documented on `live-postgres`, translated to
# `performance_schema` and `information_schema` surfaces. See
# `src/live_mysql/*` and `spec/permissions.mysql.sql`. The
# allow-list enum and query-text SHA-256 lock are unconditionally
# compiled (enum lives in library mode, see `src/live_mysql/queries.rs`);
# the runtime connection wrapper is feature-gated because it pulls
# `mysql_async` and its async TLS dependency tree.
= ["dep:mysql_async", "dep:tokio", "dep:futures-util", "report"]
= ["cli", "report", "otel", "live-postgres", "live-mysql"]
[]
= "0.1.2"
= "1"
= { = "4", = ["derive"], = true }
= "1.3"
= { = "1", = ["derive"] }
= { = "1", = true }
= "0.9"
= { = "0.4", = false, = ["clock"] }
= "0.10"
= "0.8"
= "0.3"
# plotters: `ttf` feature is required because in plotters 0.3.7 the
# default `FontData::draw` stub panics with "The font implementation is
# unable to draw text" — see
# plotters-0.3.7/src/style/font/mod.rs:75. Dropping the ttf feature
# disables *all* caption / axis / annotation text rendering, which
# makes every PNG emitter in this crate unusable. We accept the
# font-kit dylib-resolution surface that PLUGIN-LOAD flags: the
# trade-off is honestly in favour of producing the figures the paper
# cites. Phase-C7 feature-gates `plotters` behind the `report`
# feature so library-mode consumers can opt out.
= { = "0.3", = false, = ["bitmap_backend", "bitmap_encoder", "line_series", "ttf"], = true }
# Deterministic zip for the `reproduce-all` artefact bundle. We use the
# `stored` (no-compression) store-mode with pinned entry metadata so the
# resulting archive is byte-stable across reruns of the same seed; a
# determinism test (`tests/reproduce_all_zip_is_deterministic.rs`) pins
# SHA-256 equality of two independent invocations. `default-features =
# false` drops zstd / bzip2 / openssl surface; we do not compress.
= { = "0.6", = false }
# Live PostgreSQL adapter dependencies (feature-gated behind
# `live-postgres`). Current-thread tokio runtime only — the live
# subcommand does not need multi-thread scheduling, and dropping
# `rt-multi-thread` keeps the default build's direct-dep count
# unchanged. `tokio-postgres` is the sfackler/rust-postgres async
# client; pinned to 0.7 (latest minor as of 2026-04).
= { = "1", = ["rt", "time", "macros", "signal", "sync"], = true }
= { = "0.7", = true }
= { = "0.3", = true }
# Live MySQL adapter dependency (feature-gated behind `live-mysql`).
# `default-rustls` replaces the default native-tls backend with
# rustls; `rustls-tls` is strictly more auditable than native-tls
# for a paper that pins a three-layer code-audit contract. The dep
# is behind an optional flag so library-mode consumers pay nothing.
= { = "0.36", = false, = ["default-rustls"], = true }
[]
= "3"
# Property-testing: `arbtest` is used in `tests/property_envelope_arbtest.rs`
# to cross-validate the kani proofs of `grammar::envelope::classify` with
# randomised shrinkable cases.
= "0.3"
# Concurrency exploration: `loom` is used in `tests/concurrent_stream_loom.rs`
# to verify that a cloned `ResidualStream` read from two threads is
# observationally equivalent to a single-threaded read. The crate itself
# is single-threaded; loom documents the absence of shared-state hazards.
= "0.7"
# Compile-fail harness for the read-only connection surface test
# (`tests/live_readonly_conn_surface.rs`) — asserts that calling
# `execute`, `prepare`, or `transaction` on a `ReadOnlyPgConn` fails
# to compile. The data-diode guarantee is type-level, so the test
# that pins it must be a build-time assertion.
= "1"
# Pass-2 M5: Criterion microbenchmarks for the motif engine, the
# baseline detectors, and the live distiller. Used by the three
# bench targets under `benches/`. Disabled-by-default `harness =
# false` keeps the bench layout independent of the test runner.
= "0.5"
[]
= "dsfb_database"
= "src/lib.rs"
[[]]
= "dsfb-database"
= "src/main.rs"
# Main binary emits figures (plotters) and JSON sidecars (serde_json),
# so it needs both `cli` and `report`.
= ["cli", "report"]
# Phase-A1: multi-seed variance sweep. Runs the controlled TPC-DS
# perturbation pipeline across seeds 1..=N and reports mean/stddev/min/
# max for every per-motif metric. Produces artefacts outside the pinned
# fingerprint path (out/variance.csv); the single-seed fingerprint lock
# is untouched.
[[]]
= "variance_sweep"
= "src/bin/variance_sweep.rs"
= ["cli"]
# Phase-A2: precision/recall/F1 sweep over the (drift, slew)
# thresholds. Emits one CSV + one PNG per motif, plus the baseline
# operating point marked on each figure. Produces artefacts outside the
# pinned fingerprint path (out/pr.<motif>.csv, out/pr.<motif>.png).
[[]]
= "pr_sweep"
= "src/bin/pr_sweep.rs"
# PR sweep emits PNG figures — requires the `report` feature in
# addition to `cli`.
= ["cli", "report"]
# Phase-A3: false-alarm calibration on a Gaussian null trace. Runs the
# motif grammar on a pure-noise residual stream across a seed range and
# reports per-motif mean false-alarms-per-hour with a 95% CI. Produces
# artefacts outside the pinned fingerprint path (out/null.csv).
[[]]
= "null_trace"
= "src/bin/null_trace.rs"
= ["cli"]
# Phase-A4: bake-off vs. published change-point baselines (ADWIN,
# BOCPD, PELT) on the same TPC-DS perturbation stream. Emits one CSV per
# motif comparing dsfb-database against each baseline under identical
# scoring. Produces artefacts outside the pinned fingerprint path
# (out/bakeoff.<motif>.csv).
[[]]
= "baseline_bake_off"
= "src/bin/baseline_bake_off.rs"
= ["cli"]
# Phase-B1: inject one parametric perturbation onto each adapter's
# real-shaped exemplar (Snowset, SQLShare, CEB, JOB) and measure
# per-(carrier, motif, scale) detection latency and onset-localization
# error against the injected ground truth. Produces artefacts outside
# the pinned fingerprint path (out/inject_over_real.csv).
[[]]
= "inject_over_real"
= "src/bin/inject_over_real.rs"
= ["cli"]
# Phase-B5: cost / overhead benchmark. Replicates the seed-42 TPC-DS
# perturbation stream up to the target residual count, times
# MotifEngine::run, and reports throughput, per-step mean latency,
# and peak resident set size. Produces artefacts outside the pinned
# fingerprint path (out/cost.csv).
[[]]
= "ingest_throughput"
= "src/bin/ingest_throughput.rs"
= ["cli"]
# Phase-B4: one-at-a-time motif-parameter ablation. Sweeps each of
# the five MotifParams knobs independently per motif and reports per-
# (motif, parameter, probe) precision / recall / F1. Produces
# artefacts outside the pinned fingerprint path
# (out/ablation.<motif>.csv).
[[]]
= "ablation_sweep"
= "src/bin/ablation_sweep.rs"
= ["cli"]
# Phase-B2: TPC-C generalization replay through the unchanged
# pg_stat_statements adapter. Synthesises a TPC-C-shaped snapshot CSV
# (a workload shape the envelope was never tuned against), plants two
# ground-truth perturbations (plan regression + workload-phase
# concentration), and reports per-motif episode counts plus
# localization against the planted windows. Produces artefacts outside
# the pinned fingerprint path (out/tpc_c_generalization.csv).
[[]]
= "tpc_c_generalization"
= "src/bin/tpc_c_generalization.rs"
= ["cli"]
# Paper §Live figure regeneration. Deterministically synthesises a
# pg_stat_statements snapshot trajectory with a planted plan
# regression, drives it through the live DistillerState (the exact
# function called on every poll in live mode), and renders the
# three-panel pulsed-scrape figure. Writes the fixture CSVs to
# paper/fixtures/live_pg/ and the PNG to paper/figs/live_pulsed_scrape.png.
[[]]
= "live_pulsed_scrape_figure"
= "src/bin/live_pulsed_scrape_figure.rs"
= ["cli", "report", "live-postgres"]
# Paper §Live Evaluation: replay a SHA-256-pinned residual tape through
# DSFB + ADWIN / BOCPD / PELT, score each against a ground-truth
# windows JSON, and emit a single apples-to-apples bakeoff CSV. The
# live-adapter analogue of the offline `baseline_bake_off` binary —
# same scoring (`metrics::evaluate`), same detectors (`baselines::*`),
# different input (a live-captured tape instead of the synthetic
# perturbation stream).
[[]]
= "replay_tape_baselines"
= "src/bin/replay_tape_baselines.rs"
= ["cli", "report", "live-postgres"]
# Paper §Live Evaluation figure renderer. Consumes the two pinned
# tapes under paper/fixtures/live_pg_real/ plus the ground-truth JSON
# and produces two PNGs:
# * live_real_pg_trajectory.png — three-panel real-engine figure.
# * live_determinism_overlay.png — two-panel engine→tape /
# tape→episodes asymmetry figure.
# Output is a byte-deterministic function of the pinned fixtures.
[[]]
= "render_live_eval_figures"
= "src/bin/render_live_eval_figures.rs"
= ["cli", "report", "live-postgres"]
# Paper §Live-Eval baseline-tuning sweep. Picks the best macro-F1
# hyperparameter config for each of ADWIN/BOCPD/PELT on a held-out
# training replication (default rep01 of every fault), freezes the
# config, and evaluates it on the remaining replications. DSFB is
# evaluated at defaults (not re-tuned) so the comparison is
# "baselines at best training-split config" vs. "DSFB as published".
# See paper/tables/baseline_tuned.tex + §Held-Out Baseline Tuning.
[[]]
= "baseline_tune"
= "src/bin/baseline_tune.rs"
= ["cli", "report", "live-postgres"]
# Paper §Public-Trace Bake-Off. Evaluates all four detectors on the
# publicly-cited offline traces (Snowset, SQLShare, CEB, JOB) using
# the same scoring pipeline as the live tape bake-off. Because those
# traces are not fault-annotated, every emitted episode counts as an
# FP; we report per-detector FAR/hr with 95 % bootstrap CI as a
# workload-stress *upper bound* on false-alarm rate, not a
# detection-quality claim. See paper/tables/public_trace_far.tex.
[[]]
= "public_trace_bakeoff"
= "src/bin/public_trace_bakeoff.rs"
= ["cli", "report"]
# Pass-2 N4: Monte-Carlo coverage of the percentile-bootstrap 95 % CI
# at small sample sizes. The §Live-Eval table reports CIs at n=10 and
# the Pass-2 statistics reviewer asked for an empirical coverage curve
# rather than just the literature caveat. Output is a CSV that the
# paper's bootstrap-coverage figure renders directly. Pure synthetic
# Monte Carlo — no engine touched, no fixture mutated.
[[]]
= "bootstrap_coverage"
= "src/bin/bootstrap_coverage.rs"
= ["cli"]
# Pass-2 M5: Criterion microbenchmarks. Each [[bench]] entry is a
# standalone target with `harness = false` so Criterion's main()
# replaces the default test harness. They live under benches/ and
# are run by `cargo bench`.
[[]]
= "motif_engine"
= "benches/motif_engine.rs"
= false
= ["cli"]
[[]]
= "baselines"
= "benches/baselines.rs"
= false
= ["cli"]
[[]]
= "live_distiller"
= "benches/live_distiller.rs"
= false
= ["cli", "live-postgres"]
[]
= 3
= "thin"