Skip to main content

samkhya_postgres/
lib.rs

1//! samkhya-postgres — PostgreSQL adapter for samkhya.
2//!
3//! # Build modes
4//!
5//! - **Default** (no features): empty `rlib`. Compiles without
6//!   PostgreSQL development headers; suitable for `cargo check
7//!   --workspace` in CI environments that do not have `libpq-dev` /
8//!   `postgresql-server-dev-*` installed.
9//! - **`pg_extension`** feature **plus** `samkhya_pgrx_enabled` rustc
10//!   cfg flag: pulls in [pgrx] and exposes the functions defined
11//!   below as a loadable PostgreSQL extension targeting **PostgreSQL
12//!   17** (the only major supported in v1.0). Build with `cargo pgrx`
13//!   — see this crate's README. The recommended invocation is:
14//!
15//!   ```bash
16//!   RUSTFLAGS="--cfg=samkhya_pgrx_enabled" \
17//!     cargo pgrx run pg17 --features pg_extension --package samkhya-postgres
18//!   ```
19//!
20//! # v1.0 double-gating + single-version pin (pg17)
21//!
22//! pgrx 0.12.9's `pgrx-pg-sys` build script panics at bindgen-time
23//! when more than one `pg$VERSION` feature is simultaneously active:
24//!
25//! ```text
26//! Error: Multiple `pg$VERSION` features found.
27//! `--no-default-features` may be required.
28//! Found: pg13, pg14, pg15, pg16, pg17
29//! ```
30//!
31//! Cargo's `--all-features` (used by `cargo check --workspace
32//! --all-features` and similar workspace-wide gates) activates every
33//! feature in a crate's `[features]` table simultaneously. With the
34//! canonical pgrx feature-flag pattern (pg13..pg17 as parallel
35//! features that each forward `pgrx/pgNN`), workspace gates therefore
36//! cannot pass — pgrx-pg-sys's build script panics before any
37//! manifest-level `compile_error!` we add ever fires.
38//!
39//! For v1.0 the design is:
40//!
41//! 1. **Single-version pin (pg17)**. The `pg_extension` Cargo feature
42//!    forwards `pgrx/pg17`. No pg13..pg16 features are declared.
43//! 2. **Target-cfg dep isolation**. The pgrx dependency lives under
44//!    `[target.'cfg(samkhya_pgrx_enabled)'.dependencies]` in
45//!    `Cargo.toml`. Under `cargo check --workspace --all-features`
46//!    (where `samkhya_pgrx_enabled` is unset), pgrx is excluded from
47//!    the dep graph and `pg_extension` is a harmless no-op. Under
48//!    `RUSTFLAGS="--cfg=samkhya_pgrx_enabled" cargo pgrx run pg17
49//!    --features pg_extension`, pgrx enters the dep graph and the
50//!    extension module below compiles.
51//!
52//! v1.1 will restore pg13..pg16 when one of:
53//!
54//! - pgrx 0.13+ removes the feature-multiplexing constraint, or
55//! - the pgrx-using code is moved to a non-workspace sub-crate that
56//!   does not participate in `--workspace --all-features` gates.
57//!
58//! See `feedback-pgrx-feature-isolation` memory for the full
59//! design-decision record and retire conditions.
60//!
61//! # Provided SQL functions (when built as an extension)
62//!
63//! - `samkhya_hll_count(input anyarray) -> bigint` — build a samkhya
64//!   `HllSketch` from the input array and return its estimated
65//!   distinct-element count. Useful as a quick sanity check that the
66//!   in-engine sketch agrees with the portable sketch produced by
67//!   samkhya-core.
68//! - `samkhya_puffin_inspect(path text) -> jsonb` — open a Puffin
69//!   sidecar file on the server filesystem and return per-blob
70//!   metadata (`kind`, `offset`, `length`, `fields`,
71//!   `compression-codec`).
72//!
73//! # Scope
74//!
75//! This is the v1.0 scaffold. A v1.1 target is the operator-side
76//! cardinality hook (replacing `get_relation_info_hook` and friends)
77//! so the planner picks up samkhya's portable, feedback-driven,
78//! self-correcting row estimates without per-query SQL changes. The
79//! `get_relation_info_hook` integration is intentionally deferred
80//! because it requires deeper pgrx hook plumbing than belongs in a
81//! scaffold.
82//!
83//! [pgrx]: https://github.com/pgcentralfoundation/pgrx
84
85#![cfg_attr(
86    not(all(feature = "pg_extension", samkhya_pgrx_enabled)),
87    deny(rust_2018_idioms)
88)]
89
90// ---------------------------------------------------------------------
91// Non-extension build: empty rlib.
92//
93// The stub compiles whenever the `pg_extension` feature is OFF, OR
94// when the `samkhya_pgrx_enabled` cfg flag is unset. The latter
95// catches `cargo check --workspace --all-features` (which enables
96// `pg_extension` but does not set the cfg flag), keeping the
97// workspace-wide gate green on hosts without PG dev headers.
98// ---------------------------------------------------------------------
99
100#[cfg(not(all(feature = "pg_extension", samkhya_pgrx_enabled)))]
101mod stub {
102    //! Stub surface that compiles without pgrx.
103    //!
104    //! The real extension entry points only exist when the
105    //! `pg_extension` feature is enabled. We keep one trivially
106    //! callable stub here so `cargo check` exercises something other
107    //! than an empty crate root, and so downstream tooling that lists
108    //! crate items has at least one symbol to point at.
109
110    /// Returns the samkhya-postgres crate version string.
111    pub fn version() -> &'static str {
112        env!("CARGO_PKG_VERSION")
113    }
114
115    #[cfg(test)]
116    mod tests {
117        use super::*;
118
119        #[test]
120        fn version_is_non_empty() {
121            assert!(!version().is_empty());
122        }
123    }
124}
125
126#[cfg(not(all(feature = "pg_extension", samkhya_pgrx_enabled)))]
127pub use stub::version;
128
129// ---------------------------------------------------------------------
130// pgrx-backed extension build.
131//
132// Activated only when BOTH:
133//   - the `pg_extension` Cargo feature is enabled, AND
134//   - the `samkhya_pgrx_enabled` rustc cfg flag is set
135//     (typically via `RUSTFLAGS="--cfg=samkhya_pgrx_enabled"`).
136// The double-gate ensures workspace-wide `--all-features` builds do
137// not pull pgrx into the dep graph on hosts without PG dev headers.
138// ---------------------------------------------------------------------
139
140#[cfg(all(feature = "pg_extension", samkhya_pgrx_enabled))]
141mod extension {
142    use pgrx::prelude::*;
143    use pgrx::{AnyElement, Array, JsonB};
144    use samkhya_core::puffin::PuffinReader;
145    use samkhya_core::sketches::HllSketch;
146    use serde_json::{Map, Value, json};
147    use std::fs::File;
148    use std::io::BufReader;
149
150    pgrx::pg_module_magic!();
151
152    /// Build a samkhya HLL sketch from the input array and return its
153    /// estimated distinct-element count.
154    ///
155    /// `NULL` elements are skipped. The sketch precision is fixed at
156    /// 14 (≈16 KiB of registers, ≈0.81% relative standard error),
157    /// matching the default used elsewhere in samkhya.
158    #[pg_extern(immutable, parallel_safe)]
159    fn samkhya_hll_count(input: Array<'_, AnyElement>) -> i64 {
160        const PRECISION: u8 = 14;
161
162        let mut hll = match HllSketch::new(PRECISION) {
163            Ok(h) => h,
164            Err(e) => error!("samkhya_hll_count: failed to build HLL sketch: {e}"),
165        };
166
167        for elem in input.iter().flatten() {
168            // Hash the raw Datum bytes. This treats two values as
169            // equal iff their on-disk representation is bitwise equal,
170            // which is correct for fixed-width Postgres types and for
171            // canonicalized varlena types. For non-canonical varlena
172            // inputs the caller should pre-canonicalize.
173            let datum = elem.into_datum();
174            let bytes = datum.to_ne_bytes();
175            hll.add(&bytes);
176        }
177
178        hll.estimate() as i64
179    }
180
181    /// Open a Puffin sidecar file at `path` on the server filesystem
182    /// and return per-blob metadata as JSONB.
183    ///
184    /// The returned object has shape:
185    /// ```json
186    /// {
187    ///   "blobs": [
188    ///     {
189    ///       "kind": "samkhya.hll-v1",
190    ///       "fields": [7],
191    ///       "offset": 4,
192    ///       "length": 16384,
193    ///       "compression_codec": null
194    ///     }
195    ///   ]
196    /// }
197    /// ```
198    #[pg_extern(stable, parallel_safe)]
199    fn samkhya_puffin_inspect(path: &str) -> JsonB {
200        let file = match File::open(path) {
201            Ok(f) => f,
202            Err(e) => error!("samkhya_puffin_inspect: open {path}: {e}"),
203        };
204        let reader = match PuffinReader::open(BufReader::new(file)) {
205            Ok(r) => r,
206            Err(e) => error!("samkhya_puffin_inspect: parse {path}: {e}"),
207        };
208
209        let blobs: Vec<Value> = reader
210            .blobs()
211            .iter()
212            .map(|b| {
213                let mut entry = Map::new();
214                entry.insert("kind".into(), Value::String(b.kind.clone()));
215                entry.insert(
216                    "fields".into(),
217                    Value::Array(b.fields.iter().map(|f| json!(*f)).collect()),
218                );
219                entry.insert("offset".into(), json!(b.offset));
220                entry.insert("length".into(), json!(b.length));
221                entry.insert(
222                    "compression_codec".into(),
223                    match &b.compression_codec {
224                        Some(c) => Value::String(c.clone()),
225                        None => Value::Null,
226                    },
227                );
228                Value::Object(entry)
229            })
230            .collect();
231
232        JsonB(json!({ "blobs": blobs }))
233    }
234
235    // -----------------------------------------------------------------
236    // pg_test plumbing — exercised by `cargo pgrx test`.
237    // -----------------------------------------------------------------
238
239    #[cfg(any(test, feature = "pg_test"))]
240    #[pg_schema]
241    mod tests {
242        use pgrx::prelude::*;
243
244        #[pg_test]
245        fn hll_count_on_small_array_is_plausible() {
246            let n: Option<i64> = Spi::get_one(
247                "SELECT samkhya_hll_count(ARRAY[1, 2, 3, 4, 5, 5, 5]::int[]::anyarray)",
248            )
249            .expect("Spi::get_one");
250            let n = n.expect("non-null result");
251            // Five distinct ints; HLL at p=14 should land close.
252            assert!((1..=10).contains(&n), "estimate {n} not near 5");
253        }
254    }
255
256    /// pgrx test framework entry point.
257    #[cfg(test)]
258    pub mod pg_test {
259        pub fn setup(_options: Vec<&str>) {}
260
261        pub fn postgresql_conf_options() -> Vec<&'static str> {
262            vec![]
263        }
264    }
265}