samkhya_postgres/lib.rs
1//! samkhya-postgres — PostgreSQL adapter for samkhya.
2//!
3//! # Build modes
4//!
5//! - **Default** (no features): empty `rlib`. Compiles without
6//! PostgreSQL development headers; suitable for `cargo check
7//! --workspace` in CI environments that do not have `libpq-dev` /
8//! `postgresql-server-dev-*` installed.
9//! - **`pg_extension`** feature **plus** `samkhya_pgrx_enabled` rustc
10//! cfg flag: pulls in [pgrx] and exposes the functions defined
11//! below as a loadable PostgreSQL extension targeting **PostgreSQL
12//! 17** (the only major supported in v1.0). Build with `cargo pgrx`
13//! — see this crate's README. The recommended invocation is:
14//!
15//! ```bash
16//! RUSTFLAGS="--cfg=samkhya_pgrx_enabled" \
17//! cargo pgrx run pg17 --features pg_extension --package samkhya-postgres
18//! ```
19//!
20//! # v1.0 double-gating + single-version pin (pg17)
21//!
22//! pgrx 0.12.9's `pgrx-pg-sys` build script panics at bindgen-time
23//! when more than one `pg$VERSION` feature is simultaneously active:
24//!
25//! ```text
26//! Error: Multiple `pg$VERSION` features found.
27//! `--no-default-features` may be required.
28//! Found: pg13, pg14, pg15, pg16, pg17
29//! ```
30//!
31//! Cargo's `--all-features` (used by `cargo check --workspace
32//! --all-features` and similar workspace-wide gates) activates every
33//! feature in a crate's `[features]` table simultaneously. With the
34//! canonical pgrx feature-flag pattern (pg13..pg17 as parallel
35//! features that each forward `pgrx/pgNN`), workspace gates therefore
36//! cannot pass — pgrx-pg-sys's build script panics before any
37//! manifest-level `compile_error!` we add ever fires.
38//!
39//! For v1.0 the design is:
40//!
41//! 1. **Single-version pin (pg17)**. The `pg_extension` Cargo feature
42//! forwards `pgrx/pg17`. No pg13..pg16 features are declared.
43//! 2. **Target-cfg dep isolation**. The pgrx dependency lives under
44//! `[target.'cfg(samkhya_pgrx_enabled)'.dependencies]` in
45//! `Cargo.toml`. Under `cargo check --workspace --all-features`
46//! (where `samkhya_pgrx_enabled` is unset), pgrx is excluded from
47//! the dep graph and `pg_extension` is a harmless no-op. Under
48//! `RUSTFLAGS="--cfg=samkhya_pgrx_enabled" cargo pgrx run pg17
49//! --features pg_extension`, pgrx enters the dep graph and the
50//! extension module below compiles.
51//!
52//! v1.1 will restore pg13..pg16 when one of:
53//!
54//! - pgrx 0.13+ removes the feature-multiplexing constraint, or
55//! - the pgrx-using code is moved to a non-workspace sub-crate that
56//! does not participate in `--workspace --all-features` gates.
57//!
58//! See `feedback-pgrx-feature-isolation` memory for the full
59//! design-decision record and retire conditions.
60//!
61//! # Provided SQL functions (when built as an extension)
62//!
63//! - `samkhya_hll_count(input anyarray) -> bigint` — build a samkhya
64//! `HllSketch` from the input array and return its estimated
65//! distinct-element count. Useful as a quick sanity check that the
66//! in-engine sketch agrees with the portable sketch produced by
67//! samkhya-core.
68//! - `samkhya_puffin_inspect(path text) -> jsonb` — open a Puffin
69//! sidecar file on the server filesystem and return per-blob
70//! metadata (`kind`, `offset`, `length`, `fields`,
71//! `compression-codec`).
72//!
73//! # Scope
74//!
75//! This is the v1.0 scaffold. A v1.1 target is the operator-side
76//! cardinality hook (replacing `get_relation_info_hook` and friends)
77//! so the planner picks up samkhya's portable, feedback-driven,
78//! self-correcting row estimates without per-query SQL changes. The
79//! `get_relation_info_hook` integration is intentionally deferred
80//! because it requires deeper pgrx hook plumbing than belongs in a
81//! scaffold.
82//!
83//! [pgrx]: https://github.com/pgcentralfoundation/pgrx
84
85#![cfg_attr(
86 not(all(feature = "pg_extension", samkhya_pgrx_enabled)),
87 deny(rust_2018_idioms)
88)]
89
90// ---------------------------------------------------------------------
91// Non-extension build: empty rlib.
92//
93// The stub compiles whenever the `pg_extension` feature is OFF, OR
94// when the `samkhya_pgrx_enabled` cfg flag is unset. The latter
95// catches `cargo check --workspace --all-features` (which enables
96// `pg_extension` but does not set the cfg flag), keeping the
97// workspace-wide gate green on hosts without PG dev headers.
98// ---------------------------------------------------------------------
99
100#[cfg(not(all(feature = "pg_extension", samkhya_pgrx_enabled)))]
101mod stub {
102 //! Stub surface that compiles without pgrx.
103 //!
104 //! The real extension entry points only exist when the
105 //! `pg_extension` feature is enabled. We keep one trivially
106 //! callable stub here so `cargo check` exercises something other
107 //! than an empty crate root, and so downstream tooling that lists
108 //! crate items has at least one symbol to point at.
109
110 /// Returns the samkhya-postgres crate version string.
111 pub fn version() -> &'static str {
112 env!("CARGO_PKG_VERSION")
113 }
114
115 #[cfg(test)]
116 mod tests {
117 use super::*;
118
119 #[test]
120 fn version_is_non_empty() {
121 assert!(!version().is_empty());
122 }
123 }
124}
125
126#[cfg(not(all(feature = "pg_extension", samkhya_pgrx_enabled)))]
127pub use stub::version;
128
129// ---------------------------------------------------------------------
130// pgrx-backed extension build.
131//
132// Activated only when BOTH:
133// - the `pg_extension` Cargo feature is enabled, AND
134// - the `samkhya_pgrx_enabled` rustc cfg flag is set
135// (typically via `RUSTFLAGS="--cfg=samkhya_pgrx_enabled"`).
136// The double-gate ensures workspace-wide `--all-features` builds do
137// not pull pgrx into the dep graph on hosts without PG dev headers.
138// ---------------------------------------------------------------------
139
140#[cfg(all(feature = "pg_extension", samkhya_pgrx_enabled))]
141mod extension {
142 use pgrx::prelude::*;
143 use pgrx::{AnyElement, Array, JsonB};
144 use samkhya_core::puffin::PuffinReader;
145 use samkhya_core::sketches::HllSketch;
146 use serde_json::{Map, Value, json};
147 use std::fs::File;
148 use std::io::BufReader;
149
150 pgrx::pg_module_magic!();
151
152 /// Build a samkhya HLL sketch from the input array and return its
153 /// estimated distinct-element count.
154 ///
155 /// `NULL` elements are skipped. The sketch precision is fixed at
156 /// 14 (≈16 KiB of registers, ≈0.81% relative standard error),
157 /// matching the default used elsewhere in samkhya.
158 #[pg_extern(immutable, parallel_safe)]
159 fn samkhya_hll_count(input: Array<'_, AnyElement>) -> i64 {
160 const PRECISION: u8 = 14;
161
162 let mut hll = match HllSketch::new(PRECISION) {
163 Ok(h) => h,
164 Err(e) => error!("samkhya_hll_count: failed to build HLL sketch: {e}"),
165 };
166
167 for elem in input.iter().flatten() {
168 // Hash the raw Datum bytes. This treats two values as
169 // equal iff their on-disk representation is bitwise equal,
170 // which is correct for fixed-width Postgres types and for
171 // canonicalized varlena types. For non-canonical varlena
172 // inputs the caller should pre-canonicalize.
173 let datum = elem.into_datum();
174 let bytes = datum.to_ne_bytes();
175 hll.add(&bytes);
176 }
177
178 hll.estimate() as i64
179 }
180
181 /// Open a Puffin sidecar file at `path` on the server filesystem
182 /// and return per-blob metadata as JSONB.
183 ///
184 /// The returned object has shape:
185 /// ```json
186 /// {
187 /// "blobs": [
188 /// {
189 /// "kind": "samkhya.hll-v1",
190 /// "fields": [7],
191 /// "offset": 4,
192 /// "length": 16384,
193 /// "compression_codec": null
194 /// }
195 /// ]
196 /// }
197 /// ```
198 #[pg_extern(stable, parallel_safe)]
199 fn samkhya_puffin_inspect(path: &str) -> JsonB {
200 let file = match File::open(path) {
201 Ok(f) => f,
202 Err(e) => error!("samkhya_puffin_inspect: open {path}: {e}"),
203 };
204 let reader = match PuffinReader::open(BufReader::new(file)) {
205 Ok(r) => r,
206 Err(e) => error!("samkhya_puffin_inspect: parse {path}: {e}"),
207 };
208
209 let blobs: Vec<Value> = reader
210 .blobs()
211 .iter()
212 .map(|b| {
213 let mut entry = Map::new();
214 entry.insert("kind".into(), Value::String(b.kind.clone()));
215 entry.insert(
216 "fields".into(),
217 Value::Array(b.fields.iter().map(|f| json!(*f)).collect()),
218 );
219 entry.insert("offset".into(), json!(b.offset));
220 entry.insert("length".into(), json!(b.length));
221 entry.insert(
222 "compression_codec".into(),
223 match &b.compression_codec {
224 Some(c) => Value::String(c.clone()),
225 None => Value::Null,
226 },
227 );
228 Value::Object(entry)
229 })
230 .collect();
231
232 JsonB(json!({ "blobs": blobs }))
233 }
234
235 // -----------------------------------------------------------------
236 // pg_test plumbing — exercised by `cargo pgrx test`.
237 // -----------------------------------------------------------------
238
239 #[cfg(any(test, feature = "pg_test"))]
240 #[pg_schema]
241 mod tests {
242 use pgrx::prelude::*;
243
244 #[pg_test]
245 fn hll_count_on_small_array_is_plausible() {
246 let n: Option<i64> = Spi::get_one(
247 "SELECT samkhya_hll_count(ARRAY[1, 2, 3, 4, 5, 5, 5]::int[]::anyarray)",
248 )
249 .expect("Spi::get_one");
250 let n = n.expect("non-null result");
251 // Five distinct ints; HLL at p=14 should land close.
252 assert!((1..=10).contains(&n), "estimate {n} not near 5");
253 }
254 }
255
256 /// pgrx test framework entry point.
257 #[cfg(test)]
258 pub mod pg_test {
259 pub fn setup(_options: Vec<&str>) {}
260
261 pub fn postgresql_conf_options() -> Vec<&'static str> {
262 vec![]
263 }
264 }
265}