dsfb-database 0.1.1

DSFB-Database: deterministic, read-only structural observer for residual trajectories in SQL database telemetry. Empirical prior-art demonstration on Snowset, SQLShare, CEB, JOB, and TPC-DS.
Documentation
[workspace]
exclude = ["fuzz"]

# IP Notice. The theoretical framework, formal constructions, and supervisory
# methods described herein constitute proprietary Background IP of Invariant
# Forge LLC (Delaware LLC No. 10529072), with prior art established by this
# publication and earlier Zenodo DOI publications by the same author.
# Commercial deployment requires a separate written license. Reference
# implementations are released under Apache 2.0.
# Licensing: licensing@invariantforge.net

[package]
name = "dsfb-database"
version = "0.1.1"
edition = "2021"
rust-version = "1.74"
authors = ["Riaan de Beer <riaan@invariantforge.net>"]
default-run = "dsfb-database"
description = "DSFB-Database: deterministic, read-only structural observer for residual trajectories in SQL database telemetry. Empirical prior-art demonstration on Snowset, SQLShare, CEB, JOB, and TPC-DS."
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/infinityabundance/dsfb"
homepage = "https://github.com/infinityabundance/dsfb"
documentation = "https://docs.rs/dsfb-database"
keywords = ["dsfb", "database", "observability", "residual", "sql"]
categories = ["science", "command-line-utilities"]
include = [
    "src/**",
    "spec/**",
    "tests/**",
    "examples/**",
    "audit/**",
    "colab/dsfb_database_repro.ipynb",
    "/Cargo.toml",
    "/README.md",
    "/LICENSE",
    "/NOTICE",
    "/CITATION.cff",
]

# Phase-C7: library-mode consumers pay for core adapter + grammar +
# metrics + fingerprint machinery only. The `cli` feature adds `clap`
# for the bundled binaries; the `report` feature adds `plotters` +
# `serde_json` for PNG/JSON emission. `full` is the convenience
# superset users get from `cargo install dsfb-database --features full`.
# `cargo tree --depth 1` on default features reports ≤10 direct
# dependencies; `--features report` stays ≤13 (Phase-C DoD).
[features]
default = ["cli"]
cli = ["dep:clap"]
report = ["dep:plotters", "dep:serde_json"]
# Phase-C1: OpenTelemetry DB-spans ingestor. Adds a JSON-array adapter
# for a simplified OTel DB-span shape that is forward-compatible with
# the OTLP/JSON export format used by `otel-collector` in 2026 (see
# `src/adapters/otel.rs` for the shape). Pulls `serde_json`.
otel = ["dep:serde_json"]
# Live read-only PostgreSQL telemetry adapter. Adds an async tokio
# current-thread runtime and tokio-postgres client for pulsed-scrape
# observation of pg_stat_statements / pg_stat_activity / pg_stat_io.
# Implies `report` because live mode emits episode CSVs and JSON
# sidecars. See `src/live/*` and `paper/dsfb-database.tex §Live
# read-only adapter` for the contract — determinism migrates to the
# tape artefact, not the live engine→tape path.
live-postgres = ["dep:tokio", "dep:tokio-postgres", "dep:futures-util", "report"]
# Live read-only MySQL telemetry adapter (second engine). Shares the
# three-layer contract documented on `live-postgres`, translated to
# `performance_schema` and `information_schema` surfaces. See
# `src/live_mysql/*` and `spec/permissions.mysql.sql`. The
# allow-list enum and query-text SHA-256 lock are unconditionally
# compiled (enum lives in library mode, see `src/live_mysql/queries.rs`);
# the runtime connection wrapper is feature-gated because it pulls
# `mysql_async` and its async TLS dependency tree.
live-mysql = ["dep:mysql_async", "dep:tokio", "dep:futures-util", "report"]
full = ["cli", "report", "otel", "live-postgres", "live-mysql"]

[dependencies]
dsfb = "0.1.2"
anyhow = "1"
clap = { version = "4", features = ["derive"], optional = true }
csv = "1.3"
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1", optional = true }
serde_yaml = "0.9"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
sha2 = "0.10"
rand = "0.8"
rand_pcg = "0.3"
# plotters: `ttf` feature is required because in plotters 0.3.7 the
# default `FontData::draw` stub panics with "The font implementation is
# unable to draw text" — see
# plotters-0.3.7/src/style/font/mod.rs:75. Dropping the ttf feature
# disables *all* caption / axis / annotation text rendering, which
# makes every PNG emitter in this crate unusable. We accept the
# font-kit dylib-resolution surface that PLUGIN-LOAD flags: the
# trade-off is honestly in favour of producing the figures the paper
# cites. Phase-C7 feature-gates `plotters` behind the `report`
# feature so library-mode consumers can opt out.
plotters = { version = "0.3", default-features = false, features = ["bitmap_backend", "bitmap_encoder", "line_series", "ttf"], optional = true }
# Deterministic zip for the `reproduce-all` artefact bundle. We use the
# `stored` (no-compression) store-mode with pinned entry metadata so the
# resulting archive is byte-stable across reruns of the same seed; a
# determinism test (`tests/reproduce_all_zip_is_deterministic.rs`) pins
# SHA-256 equality of two independent invocations. `default-features =
# false` drops zstd / bzip2 / openssl surface; we do not compress.
zip = { version = "0.6", default-features = false }
# Live PostgreSQL adapter dependencies (feature-gated behind
# `live-postgres`). Current-thread tokio runtime only — the live
# subcommand does not need multi-thread scheduling, and dropping
# `rt-multi-thread` keeps the default build's direct-dep count
# unchanged. `tokio-postgres` is the sfackler/rust-postgres async
# client; pinned to 0.7 (latest minor as of 2026-04).
tokio = { version = "1", features = ["rt", "time", "macros", "signal", "sync"], optional = true }
tokio-postgres = { version = "0.7", optional = true }
futures-util = { version = "0.3", optional = true }
# Live MySQL adapter dependency (feature-gated behind `live-mysql`).
# `default-rustls` replaces the default native-tls backend with
# rustls; `rustls-tls` is strictly more auditable than native-tls
# for a paper that pins a three-layer code-audit contract. The dep
# is behind an optional flag so library-mode consumers pay nothing.
mysql_async = { version = "0.36", default-features = false, features = ["default-rustls"], optional = true }

[dev-dependencies]
tempfile = "3"
# Property-testing: `arbtest` is used in `tests/property_envelope_arbtest.rs`
# to cross-validate the kani proofs of `grammar::envelope::classify` with
# randomised shrinkable cases.
arbtest = "0.3"
# Concurrency exploration: `loom` is used in `tests/concurrent_stream_loom.rs`
# to verify that a cloned `ResidualStream` read from two threads is
# observationally equivalent to a single-threaded read. The crate itself
# is single-threaded; loom documents the absence of shared-state hazards.
loom = "0.7"
# Compile-fail harness for the read-only connection surface test
# (`tests/live_readonly_conn_surface.rs`) — asserts that calling
# `execute`, `prepare`, or `transaction` on a `ReadOnlyPgConn` fails
# to compile. The data-diode guarantee is type-level, so the test
# that pins it must be a build-time assertion.
trybuild = "1"
# Pass-2 M5: Criterion microbenchmarks for the motif engine, the
# baseline detectors, and the live distiller. Used by the three
# bench targets under `benches/`. Disabled-by-default `harness =
# false` keeps the bench layout independent of the test runner.
criterion = "0.5"

[lib]
name = "dsfb_database"
path = "src/lib.rs"

[[bin]]
name = "dsfb-database"
path = "src/main.rs"
# Main binary emits figures (plotters) and JSON sidecars (serde_json),
# so it needs both `cli` and `report`.
required-features = ["cli", "report"]

# Phase-A1: multi-seed variance sweep. Runs the controlled TPC-DS
# perturbation pipeline across seeds 1..=N and reports mean/stddev/min/
# max for every per-motif metric. Produces artefacts outside the pinned
# fingerprint path (out/variance.csv); the single-seed fingerprint lock
# is untouched.
[[bin]]
name = "variance_sweep"
path = "src/bin/variance_sweep.rs"
required-features = ["cli"]

# Phase-A2: precision/recall/F1 sweep over the (drift, slew)
# thresholds. Emits one CSV + one PNG per motif, plus the baseline
# operating point marked on each figure. Produces artefacts outside the
# pinned fingerprint path (out/pr.<motif>.csv, out/pr.<motif>.png).
[[bin]]
name = "pr_sweep"
path = "src/bin/pr_sweep.rs"
# PR sweep emits PNG figures — requires the `report` feature in
# addition to `cli`.
required-features = ["cli", "report"]

# Phase-A3: false-alarm calibration on a Gaussian null trace. Runs the
# motif grammar on a pure-noise residual stream across a seed range and
# reports per-motif mean false-alarms-per-hour with a 95% CI. Produces
# artefacts outside the pinned fingerprint path (out/null.csv).
[[bin]]
name = "null_trace"
path = "src/bin/null_trace.rs"
required-features = ["cli"]

# Phase-A4: bake-off vs. published change-point baselines (ADWIN,
# BOCPD, PELT) on the same TPC-DS perturbation stream. Emits one CSV per
# motif comparing dsfb-database against each baseline under identical
# scoring. Produces artefacts outside the pinned fingerprint path
# (out/bakeoff.<motif>.csv).
[[bin]]
name = "baseline_bake_off"
path = "src/bin/baseline_bake_off.rs"
required-features = ["cli"]

# Phase-B1: inject one parametric perturbation onto each adapter's
# real-shaped exemplar (Snowset, SQLShare, CEB, JOB) and measure
# per-(carrier, motif, scale) detection latency and onset-localization
# error against the injected ground truth. Produces artefacts outside
# the pinned fingerprint path (out/inject_over_real.csv).
[[bin]]
name = "inject_over_real"
path = "src/bin/inject_over_real.rs"
required-features = ["cli"]

# Phase-B5: cost / overhead benchmark. Replicates the seed-42 TPC-DS
# perturbation stream up to the target residual count, times
# MotifEngine::run, and reports throughput, per-step mean latency,
# and peak resident set size. Produces artefacts outside the pinned
# fingerprint path (out/cost.csv).
[[bin]]
name = "ingest_throughput"
path = "src/bin/ingest_throughput.rs"
required-features = ["cli"]

# Phase-B4: one-at-a-time motif-parameter ablation. Sweeps each of
# the five MotifParams knobs independently per motif and reports per-
# (motif, parameter, probe) precision / recall / F1. Produces
# artefacts outside the pinned fingerprint path
# (out/ablation.<motif>.csv).
[[bin]]
name = "ablation_sweep"
path = "src/bin/ablation_sweep.rs"
required-features = ["cli"]

# Phase-B2: TPC-C generalization replay through the unchanged
# pg_stat_statements adapter. Synthesises a TPC-C-shaped snapshot CSV
# (a workload shape the envelope was never tuned against), plants two
# ground-truth perturbations (plan regression + workload-phase
# concentration), and reports per-motif episode counts plus
# localization against the planted windows. Produces artefacts outside
# the pinned fingerprint path (out/tpc_c_generalization.csv).
[[bin]]
name = "tpc_c_generalization"
path = "src/bin/tpc_c_generalization.rs"
required-features = ["cli"]

# Paper §Live figure regeneration. Deterministically synthesises a
# pg_stat_statements snapshot trajectory with a planted plan
# regression, drives it through the live DistillerState (the exact
# function called on every poll in live mode), and renders the
# three-panel pulsed-scrape figure. Writes the fixture CSVs to
# paper/fixtures/live_pg/ and the PNG to paper/figs/live_pulsed_scrape.png.
[[bin]]
name = "live_pulsed_scrape_figure"
path = "src/bin/live_pulsed_scrape_figure.rs"
required-features = ["cli", "report", "live-postgres"]

# Paper §Live Evaluation: replay a SHA-256-pinned residual tape through
# DSFB + ADWIN / BOCPD / PELT, score each against a ground-truth
# windows JSON, and emit a single apples-to-apples bakeoff CSV. The
# live-adapter analogue of the offline `baseline_bake_off` binary —
# same scoring (`metrics::evaluate`), same detectors (`baselines::*`),
# different input (a live-captured tape instead of the synthetic
# perturbation stream).
[[bin]]
name = "replay_tape_baselines"
path = "src/bin/replay_tape_baselines.rs"
required-features = ["cli", "report", "live-postgres"]

# Paper §Live Evaluation figure renderer. Consumes the two pinned
# tapes under paper/fixtures/live_pg_real/ plus the ground-truth JSON
# and produces two PNGs:
#   * live_real_pg_trajectory.png — three-panel real-engine figure.
#   * live_determinism_overlay.png — two-panel engine→tape /
#     tape→episodes asymmetry figure.
# Output is a byte-deterministic function of the pinned fixtures.
[[bin]]
name = "render_live_eval_figures"
path = "src/bin/render_live_eval_figures.rs"
required-features = ["cli", "report", "live-postgres"]

# Paper §Live-Eval baseline-tuning sweep. Picks the best macro-F1
# hyperparameter config for each of ADWIN/BOCPD/PELT on a held-out
# training replication (default rep01 of every fault), freezes the
# config, and evaluates it on the remaining replications. DSFB is
# evaluated at defaults (not re-tuned) so the comparison is
# "baselines at best training-split config" vs. "DSFB as published".
# See paper/tables/baseline_tuned.tex + §Held-Out Baseline Tuning.
[[bin]]
name = "baseline_tune"
path = "src/bin/baseline_tune.rs"
required-features = ["cli", "report", "live-postgres"]

# Paper §Public-Trace Bake-Off. Evaluates all four detectors on the
# publicly-cited offline traces (Snowset, SQLShare, CEB, JOB) using
# the same scoring pipeline as the live tape bake-off. Because those
# traces are not fault-annotated, every emitted episode counts as an
# FP; we report per-detector FAR/hr with 95 % bootstrap CI as a
# workload-stress *upper bound* on false-alarm rate, not a
# detection-quality claim. See paper/tables/public_trace_far.tex.
[[bin]]
name = "public_trace_bakeoff"
path = "src/bin/public_trace_bakeoff.rs"
required-features = ["cli", "report"]

# Pass-2 N4: Monte-Carlo coverage of the percentile-bootstrap 95 % CI
# at small sample sizes. The §Live-Eval table reports CIs at n=10 and
# the Pass-2 statistics reviewer asked for an empirical coverage curve
# rather than just the literature caveat. Output is a CSV that the
# paper's bootstrap-coverage figure renders directly. Pure synthetic
# Monte Carlo — no engine touched, no fixture mutated.
[[bin]]
name = "bootstrap_coverage"
path = "src/bin/bootstrap_coverage.rs"
required-features = ["cli"]

# Pass-2 M5: Criterion microbenchmarks. Each [[bench]] entry is a
# standalone target with `harness = false` so Criterion's main()
# replaces the default test harness. They live under benches/ and
# are run by `cargo bench`.
[[bench]]
name = "motif_engine"
path = "benches/motif_engine.rs"
harness = false
required-features = ["cli"]

[[bench]]
name = "baselines"
path = "benches/baselines.rs"
harness = false
required-features = ["cli"]

[[bench]]
name = "live_distiller"
path = "benches/live_distiller.rs"
harness = false
required-features = ["cli", "live-postgres"]

[profile.release]
opt-level = 3
lto = "thin"