heeranjid 0.3.0

Distributed ID generation — HeerId (64-bit) and RanjId (128-bit UUIDv8) with configurable precision
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
//! Schema installation and seed helpers for the `tokio-postgres` stack.
//!
//! # What
//!
//! Exposes the HeeRanjID DDL and seed SQL as `pub const` blobs and offers
//! convenience async helpers that run them through a
//! [`tokio_postgres::GenericClient`]. Intended for test harnesses and
//! application bootstrap paths that want to install HeeRanjID's schema
//! without depending on the `heeranjid-sqlx` crate.
//!
//! # Why here
//!
//! The `postgres_codec` module covers per-row type coercion; this module
//! covers database-wide bootstrap. Both are gated on the `postgres`
//! feature and require `tokio-postgres` at runtime.

use tokio_postgres::GenericClient;

/// Error returned by the per-table autofill trigger helpers (Task 11 of
/// the v0.3.0 descending-sort rollout).
///
/// The Task-10 install helpers (`install_flip_functions`,
/// `install_desc_generators`, `install_migration_support`,
/// `install_all_desc_support`) deliberately stay on
/// `tokio_postgres::Error` — they don't do any client-side validation.
/// The trigger helpers, by contrast, *must* reject malformed identifiers
/// before interpolating them into SQL, which doesn't fit cleanly into
/// the `tokio_postgres::Error` shape. Hence this local enum.
#[derive(Debug)]
pub enum SchemaError {
    /// Underlying Postgres client error.
    TokioPostgres(tokio_postgres::Error),
    /// Caller passed a table/column name that failed `validate_ident`:
    /// empty, longer than 63 chars, or contains non-[A-Za-z0-9_] bytes
    /// (including the SQL-injection shapes `;`, `"`, `'`, whitespace,
    /// `--`). The offending value is carried for diagnostics only —
    /// callers should not echo it to untrusted clients.
    InvalidIdentifier(String),
}

impl std::fmt::Display for SchemaError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            SchemaError::TokioPostgres(e) => write!(f, "tokio-postgres error: {e}"),
            SchemaError::InvalidIdentifier(s) => {
                write!(f, "invalid Postgres identifier: {s:?}")
            }
        }
    }
}

impl std::error::Error for SchemaError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            SchemaError::TokioPostgres(e) => Some(e),
            SchemaError::InvalidIdentifier(_) => None,
        }
    }
}

impl From<tokio_postgres::Error> for SchemaError {
    fn from(e: tokio_postgres::Error) -> Self {
        SchemaError::TokioPostgres(e)
    }
}

/// Core `heer` schema DDL — tables, domains, and base types.
pub const SCHEMA_SQL: &str = include_str!("../sql/schema.sql");

/// Session-local node-id helpers.
pub const SESSION_SQL: &str = include_str!("../sql/functions/session.sql");

/// `generate_id()` / HeerId generation function.
pub const GENERATE_HEERID_SQL: &str = include_str!("../sql/functions/generate_heerid.sql");

/// `generate_ranj_id()` / RanjId generation function.
pub const GENERATE_RANJID_SQL: &str = include_str!("../sql/functions/generate_ranjid.sql");

/// Complete install blob — schema + all function definitions, in
/// dependency order. Equivalent to executing `SCHEMA_SQL`,
/// `SESSION_SQL`, `GENERATE_HEERID_SQL`, and `GENERATE_RANJID_SQL` in
/// sequence.
pub const INSTALL_SQL: &str = concat!(
    include_str!("../sql/schema.sql"),
    "\n",
    include_str!("../sql/functions/session.sql"),
    "\n",
    include_str!("../sql/functions/generate_heerid.sql"),
    "\n",
    include_str!("../sql/functions/generate_ranjid.sql"),
);

/// Seed SQL — inserts the default node row (node_id = 1).
pub const SEED_SQL: &str = include_str!("../sql/seed.sql");

// --- flip/generator/backfill install helpers for v0.3.0 ---

/// Flip primitives: `heerid_flip_mask`, `heerid_to_desc`/`heerid_to_asc`,
/// `ranjid_to_desc`/`ranjid_to_asc`. (§5.1)
pub const DESC_FLIP_SQL: &str = include_str!("../sql/functions/desc_flip.sql");

/// Single-row generators + desc generators: `heerid_next`, `ranjid_next`,
/// `heerid_next_desc`, `ranjid_next_desc`. (§5.1)
pub const DESC_GENERATORS_SQL: &str = include_str!("../sql/functions/desc_generators.sql");

/// Migration-support procedure: `heeranjid_bulk_backfill`. (§5.1)
pub const BULK_BACKFILL_SQL: &str = include_str!("../sql/functions/bulk_backfill.sql");

/// Install the HeeRanjID schema + functions on the target database.
///
/// Runs [`INSTALL_SQL`] via `client.batch_execute`. Idempotent in the
/// sense that all DDL uses `CREATE OR REPLACE` / `CREATE ... IF NOT
/// EXISTS`, so re-running against an already-installed database is a
/// no-op.
pub async fn install_schema<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    client.batch_execute(INSTALL_SQL).await
}

/// Seed the default node row (node_id = 1).
///
/// Runs [`SEED_SQL`] via `client.batch_execute`. Intended for test
/// setups and single-node development installs; production deployments
/// typically seed node_id at provisioning time rather than calling this.
pub async fn seed_default_node<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    client.batch_execute(SEED_SQL).await
}

// --- flip/generator/backfill install helpers for v0.3.0 ---

/// Installs the asc↔desc flip functions. Idempotent.
pub async fn install_flip_functions<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    client.batch_execute(DESC_FLIP_SQL).await
}

/// Installs `heerid_next` / `ranjid_next` single-row wrappers plus the
/// `*_next_desc` generators. Requires the base `generate_ids` /
/// `generate_ranj_ids` functions to already be present (v0.2.x schema).
pub async fn install_desc_generators<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    client.batch_execute(DESC_GENERATORS_SQL).await
}

/// Installs the `heeranjid_bulk_backfill` procedure. Does not install
/// per-table triggers — those go through `install_autofill_trigger_for_table`.
pub async fn install_migration_support<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    client.batch_execute(BULK_BACKFILL_SQL).await
}

/// Convenience: runs [`install_flip_functions`], [`install_desc_generators`],
/// and [`install_migration_support`] in order. Idempotent.
pub async fn install_all_desc_support<C>(client: &C) -> Result<(), tokio_postgres::Error>
where
    C: GenericClient + ?Sized,
{
    install_flip_functions(client).await?;
    install_desc_generators(client).await?;
    install_migration_support(client).await?;
    Ok(())
}

// --- per-table autofill trigger helpers for v0.3.0 ---

/// A source/destination column pair for a per-table autofill trigger.
///
/// `src` is the existing ascending-sort column, `dst` is the descending
/// sibling that the trigger keeps in sync. Multiple pairs may be passed
/// in one call to [`install_autofill_trigger_for_table`]; each gets an
/// independent `IF` branch in both the INSERT and UPDATE arms.
pub struct ColumnPair<'a> {
    /// Source (ascending) column name.
    pub src: &'a str,
    /// Destination (descending) column name.
    pub dst: &'a str,
}

/// Which flip family the generated trigger should call:
/// `heerid_to_desc` (64-bit) or `ranjid_to_desc` (128-bit / bytea).
pub enum IdKind {
    /// HeerId (64-bit ascending -> descending via `heerid_to_desc`).
    Heer,
    /// RanjId (128-bit ascending -> descending via `ranjid_to_desc`).
    Ranj,
}

impl IdKind {
    /// Name of the Postgres function that flips an ascending id to its
    /// descending sibling.
    fn flip_fn(&self) -> &'static str {
        match self {
            IdKind::Heer => "heerid_to_desc",
            IdKind::Ranj => "ranjid_to_desc",
        }
    }
}

/// Validates that `s` is a safe, unquoted Postgres identifier.
///
/// Accepts `^[A-Za-z_][A-Za-z0-9_]*$` up to 63 bytes (Postgres
/// `NAMEDATALEN - 1`). Rejects everything else — including strings
/// containing `;`, `"`, `'`, whitespace, or `--`, which would open an
/// SQL-injection channel when interpolated into the trigger DDL that
/// [`install_autofill_trigger_for_table`] generates.
///
/// This is a belt-and-braces check: callers should never pass untrusted
/// input here, but if they do, we fail closed rather than exec arbitrary
/// DDL.
fn validate_ident(s: &str) -> Result<(), SchemaError> {
    // 63 = Postgres NAMEDATALEN - 1.
    if s.is_empty() || s.len() > 63 {
        return Err(SchemaError::InvalidIdentifier(s.to_string()));
    }
    let mut chars = s.chars();
    let first = chars.next().expect("non-empty checked above");
    if !(first.is_ascii_alphabetic() || first == '_') {
        return Err(SchemaError::InvalidIdentifier(s.to_string()));
    }
    if !chars.all(|c| c.is_ascii_alphanumeric() || c == '_') {
        return Err(SchemaError::InvalidIdentifier(s.to_string()));
    }
    Ok(())
}

/// Installs a `BEFORE INSERT OR UPDATE` trigger that keeps descending
/// sibling columns in sync with their ascending sources.
///
/// The generated function is named `zzz_<table>_autofill_desc`. The
/// `zzz_` prefix is load-bearing — Postgres fires `BEFORE` triggers in
/// alphabetical order, and forcing this one to run last means any
/// user-defined trigger that wants to adjust `NEW.<src>` gets to do so
/// before the descending sibling is computed from it (spec §5.1).
///
/// For each `ColumnPair { src, dst }`:
/// * On `INSERT`: if `NEW.<dst> IS NULL`, fill it from `flip(NEW.<src>)`.
/// * On `UPDATE`: if `NEW.<src>` changed, recompute `NEW.<dst>`; else if
///   `NEW.<dst> IS NULL`, fill it from `flip(NEW.<src>)`.
///
/// Idempotent — re-running replaces both the function and the trigger.
///
/// # Errors
///
/// Returns [`SchemaError::InvalidIdentifier`] if `table` or any
/// `ColumnPair` field fails identifier validation (strict
/// `^[A-Za-z_][A-Za-z0-9_]*$`, max 63 chars);
/// [`SchemaError::TokioPostgres`] if the DDL `batch_execute` fails.
///
/// # Panics
///
/// Panics if `pairs` is empty — at least one pair is required.
pub async fn install_autofill_trigger_for_table<C>(
    client: &C,
    table: &str,
    pairs: &[ColumnPair<'_>],
    kind: IdKind,
) -> Result<(), SchemaError>
where
    C: GenericClient + ?Sized,
{
    assert!(!pairs.is_empty(), "at least one ColumnPair required");
    validate_ident(table)?;
    for p in pairs {
        validate_ident(p.src)?;
        validate_ident(p.dst)?;
    }

    let flip_fn = kind.flip_fn();
    let fn_name = format!("zzz_{}_autofill_desc", table);
    let trig_name = &fn_name;

    let mut insert_body = String::new();
    let mut update_body = String::new();
    for p in pairs {
        use std::fmt::Write as _;
        writeln!(
            insert_body,
            "        IF NEW.{dst} IS NULL THEN NEW.{dst} := {flip}(NEW.{src}); END IF;",
            dst = p.dst,
            flip = flip_fn,
            src = p.src,
        )
        .expect("write! to String cannot fail");
        write!(
            update_body,
            "        IF NEW.{src} IS DISTINCT FROM OLD.{src} THEN\n\
             \x20           NEW.{dst} := {flip}(NEW.{src});\n\
             \x20       ELSIF NEW.{dst} IS NULL THEN\n\
             \x20           NEW.{dst} := {flip}(NEW.{src});\n\
             \x20       END IF;\n",
            src = p.src,
            dst = p.dst,
            flip = flip_fn,
        )
        .expect("write! to String cannot fail");
    }

    let sql = format!(
        r#"
CREATE OR REPLACE FUNCTION {fn_name}() RETURNS trigger AS $body$
BEGIN
    IF TG_OP = 'INSERT' THEN
{insert_body}    ELSIF TG_OP = 'UPDATE' THEN
{update_body}    END IF;
    RETURN NEW;
END;
$body$ LANGUAGE plpgsql;

DROP TRIGGER IF EXISTS {trig_name} ON {table};
CREATE TRIGGER {trig_name}
    BEFORE INSERT OR UPDATE ON {table}
    FOR EACH ROW EXECUTE FUNCTION {fn_name}();
"#,
        fn_name = fn_name,
        trig_name = trig_name,
        insert_body = insert_body,
        update_body = update_body,
        table = table,
    );

    client.batch_execute(&sql).await?;
    Ok(())
}

/// Removes the autofill trigger and underlying function installed by
/// [`install_autofill_trigger_for_table`] for `table`.
///
/// Safe to call when the trigger is not present (uses `IF EXISTS`).
///
/// # Errors
///
/// Returns [`SchemaError::InvalidIdentifier`] if `table` fails
/// identifier validation (strict `^[A-Za-z_][A-Za-z0-9_]*$`, max 63
/// chars); [`SchemaError::TokioPostgres`] on underlying DDL failure.
pub async fn drop_autofill_trigger_for_table<C>(client: &C, table: &str) -> Result<(), SchemaError>
where
    C: GenericClient + ?Sized,
{
    validate_ident(table)?;
    let fn_name = format!("zzz_{}_autofill_desc", table);
    let sql = format!(
        "DROP TRIGGER IF EXISTS {name} ON {tbl};\n\
         DROP FUNCTION IF EXISTS {name}() CASCADE;\n",
        name = fn_name,
        tbl = table,
    );
    client.batch_execute(&sql).await?;
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn validate_ident_rejects_sql_injection_attempts() {
        // Classic statement-terminator injection.
        assert!(validate_ident("tbl; DROP TABLE users").is_err());
        // Quote-based injection.
        assert!(validate_ident("\"quoted\"").is_err());
        assert!(validate_ident("it's").is_err());
        // Comment-based injection.
        assert!(validate_ident("tbl--").is_err());
        // Whitespace.
        assert!(validate_ident("two words").is_err());
        assert!(validate_ident("tab\tname").is_err());
        assert!(validate_ident("nl\nname").is_err());
        // Empty.
        assert!(validate_ident("").is_err());
        // Over 63 chars.
        assert!(validate_ident(&"x".repeat(64)).is_err());
        // Starts with a digit.
        assert!(validate_ident("1tbl").is_err());
        // Stray punctuation that isn't ; " ' but would still be wrong.
        assert!(validate_ident("tbl-name").is_err());
        assert!(validate_ident("tbl.name").is_err());
    }

    #[test]
    fn validate_ident_accepts_valid_identifiers() {
        assert!(validate_ident("tbl").is_ok());
        assert!(validate_ident("_internal_thing").is_ok());
        assert!(validate_ident("events_v2").is_ok());
        assert!(validate_ident("A").is_ok());
        assert!(validate_ident("_").is_ok());
        assert!(validate_ident("id_desc").is_ok());
        // Exactly 63 chars is OK (NAMEDATALEN - 1).
        assert!(validate_ident(&"a".repeat(63)).is_ok());
    }

    #[test]
    fn id_kind_flip_fn_matches_sql_names() {
        assert_eq!(IdKind::Heer.flip_fn(), "heerid_to_desc");
        assert_eq!(IdKind::Ranj.flip_fn(), "ranjid_to_desc");
    }
}