ggsql 0.4.1

A declarative visualization language that extends SQL with powerful data visualization capabilities.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
//! ADBC (Arrow Database Connectivity) reader.
//!
//! Generic over any concrete ADBC `Driver` implementation. Verified against
//! two drivers in this crate's tests:
//!
//! - `adbc_datafusion` — pure-Rust, in-process. Used for routing and
//!   conversion unit tests where loading a native driver isn't worth the
//!   build complexity.
//! - `adbc_driver_duckdb` (loaded via `adbc_driver_manager::ManagedDriver`)
//!   — a real ADBC C driver, used for an equivalence suite that compares
//!   `AdbcReader<DuckDB>` output against ggsql's existing `DuckDBReader`.
//!
//! The `Reader` trait takes `&self`, but ADBC's `Statement` API takes
//! `&mut self`. We bridge this with `RefCell` around the `Connection`,
//! mirroring the interior-mutability pattern used by `OdbcReader`.

use crate::reader::{AnsiDialect, Reader, SqlDialect};
use crate::{DataFrame, GgsqlError, Result};
use adbc_core::sync::{Connection, Database, Driver};
use std::cell::RefCell;
use std::collections::HashSet;

pub struct AdbcReader<D: Driver> {
    // Driver must stay alive as long as the Database does (per ADBC contract).
    _driver: D,
    // Database must stay alive as long as the Connection does.
    _database: D::DatabaseType,
    // Connection is what Statements are made from. Wrapped in RefCell because
    // new_statement / set_sql_query / execute all take &mut, but Reader::execute_sql
    // takes &self.
    connection: RefCell<<D::DatabaseType as Database>::ConnectionType>,
    dialect: Box<dyn SqlDialect + Send>,
    registered_tables: RefCell<HashSet<String>>,
}

impl<D: Driver> AdbcReader<D> {
    /// Construct an `AdbcReader` with an explicit `SqlDialect`. Use this to
    /// plug in backend-specific dialects (e.g. a TrinoDialect, SnowflakeDialect)
    /// when the reader is pointed at that backend.
    pub fn with_dialect(driver: D, dialect: Box<dyn SqlDialect + Send>) -> Result<Self> {
        Self::new(driver, dialect)
    }

    /// Create a new `AdbcReader` from an already-initialized ADBC driver.
    ///
    /// Callers are responsible for any pre-init `Database` / `Connection`
    /// options. For convenience, use `from_driver` for the common case
    /// with the ANSI dialect, or pass a custom `SqlDialect`
    /// (e.g. a Trino / Snowflake dialect) here directly.
    pub fn new(mut driver: D, dialect: Box<dyn SqlDialect + Send>) -> Result<Self> {
        let database = driver
            .new_database()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_database failed: {}", e)))?;
        let connection = database
            .new_connection()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_connection failed: {}", e)))?;
        Ok(Self {
            _driver: driver,
            _database: database,
            connection: RefCell::new(connection),
            dialect,
            registered_tables: RefCell::new(HashSet::new()),
        })
    }

    /// Create a new `AdbcReader`, passing pre-init options to the underlying
    /// `Database`. Use this when the driver requires URI / credentials / RPC
    /// header options to be set before the first connection (e.g. Flight SQL
    /// or other auth-required backends).
    pub fn new_with_database_opts(
        mut driver: D,
        dialect: Box<dyn SqlDialect + Send>,
        opts: impl IntoIterator<
            Item = (
                adbc_core::options::OptionDatabase,
                adbc_core::options::OptionValue,
            ),
        >,
    ) -> Result<Self> {
        let database = driver.new_database_with_opts(opts).map_err(|e| {
            GgsqlError::ReaderError(format!("ADBC new_database_with_opts failed: {}", e))
        })?;
        let connection = database
            .new_connection()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_connection failed: {}", e)))?;
        Ok(Self {
            _driver: driver,
            _database: database,
            connection: RefCell::new(connection),
            dialect,
            registered_tables: RefCell::new(HashSet::new()),
        })
    }

    /// Convenience: construct with the ANSI dialect. Good default for
    /// standards-compliant backends; use `new` directly to plug in a
    /// backend-specific dialect.
    pub fn from_driver(driver: D) -> Result<Self> {
        Self::new(driver, Box::new(AnsiDialect))
    }
}

use adbc_core::sync::Statement;
use arrow::record_batch::RecordBatch;

impl<D: Driver + 'static> Reader for AdbcReader<D>
where
    D::DatabaseType: 'static,
    <D::DatabaseType as Database>::ConnectionType: 'static,
{
    fn execute_sql(&self, sql: &str) -> Result<DataFrame> {
        use arrow::array::RecordBatchReader as _;

        // Drain the `RecordBatchReader` *inside* the connection-borrow scope
        // so `stmt` and the `RefMut<Connection>` stay alive while batches are
        // streamed from the server. The `FlightSQL` driver's reader holds a
        // gRPC stream whose context is tied to the Statement; if `stmt` drops
        // before iteration completes, the first `DoGet` call cancels with
        // `Canceled; DoGet: endpoint 0: []`. Other ADBC drivers (DataFusion,
        // etc.) return self-sufficient readers, but paying for an extra early
        // release on those is worthwhile to keep a single correct code path.
        let (schema, batches) = {
            let mut conn = self.connection.try_borrow_mut().map_err(|_| {
                GgsqlError::ReaderError(
                    "AdbcReader is already mutably borrowed — another \
                     `execute_sql`/`register`/`unregister` is in progress \
                     on this reader"
                        .into(),
                )
            })?;
            let mut stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            stmt.set_sql_query(sql)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query: {}", e)))?;
            let reader = stmt
                .execute()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute: {}", e)))?;

            // Capture the declared result schema before draining batches —
            // the reader carries it even when zero batches are produced, and
            // we need it to preserve column names on empty results.
            let schema = reader.schema();
            let mut batches: Vec<RecordBatch> = Vec::new();
            for batch in reader {
                batches.push(batch.map_err(|e| {
                    GgsqlError::ReaderError(format!("ADBC RecordBatch iter: {}", e))
                })?);
            }
            (schema, batches)
        };

        let merged = if batches.is_empty() {
            RecordBatch::new_empty(schema)
        } else if batches.len() == 1 {
            batches.into_iter().next().unwrap()
        } else {
            arrow::compute::concat_batches(&schema, &batches)
                .map_err(|e| GgsqlError::ReaderError(format!("concat_batches: {}", e)))?
        };
        Ok(DataFrame::from_record_batch(merged))
    }

    fn register(&self, name: &str, df: DataFrame, replace: bool) -> Result<()> {
        super::validate_table_name(name)?;

        use adbc_core::options::{IngestMode, OptionStatement, OptionValue};
        use adbc_core::Optionable;

        if df.height() == 0 {
            return Err(GgsqlError::ReaderError(
                "AdbcReader::register: empty DataFrame not supported".into(),
            ));
        }
        let batch = df.into_inner();

        let mut conn = self.connection.try_borrow_mut().map_err(|_| {
            GgsqlError::ReaderError(
                "AdbcReader::register called re-entrantly — another operation \
                 is still holding the connection on this reader"
                    .into(),
            )
        })?;

        // Bulk-insert path: CREATE TABLE via SQL DDL, then for each batch set
        // `TargetTable` + `IngestMode::Append` + `bind(batch)` +
        // `execute_update()`. We do the CREATE ourselves (rather than relying
        // on `IngestMode::Create`) so we control the column types via the
        // `SqlDialect` and so registers behave identically across drivers
        // with varying ingest-option support — in particular,
        // `adbc_datafusion` 0.23 has `bind_stream` as `todo!()` and rejects
        // the `IngestMode` option key (`set_option` returns `NotFound`),
        // which is silently tolerated below.
        let schema = batch.schema();
        if replace {
            let drop_sql = format!("DROP TABLE IF EXISTS {}", crate::naming::quote_ident(name));
            let mut drop_stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            drop_stmt
                .set_sql_query(&drop_sql)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query DROP: {}", e)))?;
            drop_stmt
                .execute_update()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute_update DROP: {}", e)))?;
        }

        let create_sql = create_table_sql(name, &schema, &*self.dialect)?;
        let mut create_stmt = conn
            .new_statement()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
        create_stmt
            .set_sql_query(&create_sql)
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query CREATE: {}", e)))?;
        create_stmt
            .execute_update()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute_update CREATE: {}", e)))?;

        // Track the table in our set as soon as CREATE succeeds — BEFORE the
        // potentially-multi-batch ingest loop. If a bind or execute_update
        // fails mid-way, the (partial) table still exists on the server;
        // having the name tracked lets the caller `unregister()` to clean
        // up, and a subsequent `register(name, ..., replace=true)` will
        // drop-and-recreate. Without this, a mid-ingest failure would leave
        // an orphan table the reader can't reach.
        self.registered_tables.borrow_mut().insert(name.to_string());

        {
            let mut stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            stmt.set_option(
                OptionStatement::TargetTable,
                OptionValue::String(name.to_string()),
            )
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC set TargetTable: {}", e)))?;
            // Tell the driver this is an append into the table we just
            // CREATEd above. Compliant ADBC drivers (e.g. the Apache SQLite
            // driver) default `IngestMode` to `Create` when only `TargetTable`
            // is set, which would then fail because the table already exists.
            // DataFusion 0.23 doesn't expose this option key and returns
            // `Status::NotFound` from `set_option`; that's expected for
            // DataFusion's bind path (it appends by default), so swallow it
            // and continue rather than failing register().
            if let Err(e) = stmt.set_option(
                OptionStatement::IngestMode,
                OptionValue::from(IngestMode::Append),
            ) {
                if e.status != adbc_core::error::Status::NotFound {
                    return Err(GgsqlError::ReaderError(format!(
                        "ADBC set IngestMode=Append: {}",
                        e
                    )));
                }
            }
            stmt.bind(batch)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC bind: {}", e)))?;
            stmt.execute_update().map_err(|e| {
                GgsqlError::ReaderError(format!(
                    "ADBC execute_update: {}\
                     table left on server; call unregister() to drop it \
                     or register() with replace=true to retry",
                    e
                ))
            })?;
        }

        Ok(())
    }

    fn unregister(&self, name: &str) -> Result<()> {
        if !self.registered_tables.borrow().contains(name) {
            return Err(GgsqlError::ReaderError(format!(
                "Table '{}' was not registered via this reader",
                name
            )));
        }
        let sql = format!("DROP TABLE IF EXISTS {}", crate::naming::quote_ident(name));
        // Ignore the returned DataFrame — DROP TABLE has no result rows.
        self.execute_sql(&sql)?;
        self.registered_tables.borrow_mut().remove(name);
        Ok(())
    }

    fn execute(&self, query: &str) -> Result<crate::reader::Spec> {
        crate::reader::execute_with_reader(self, query)
    }

    fn dialect(&self) -> &dyn SqlDialect {
        &*self.dialect
    }
}

/// Build a `CREATE TABLE <name> (col1 TYPE, col2 TYPE, ...)` statement from
/// an Arrow schema, using the reader's `SqlDialect` for type names.
///
/// Used by `register()` to create the destination table before binding
/// batches with `IngestMode::Append`; see the `register` impl for context.
fn create_table_sql(
    name: &str,
    schema: &arrow::datatypes::Schema,
    dialect: &dyn SqlDialect,
) -> Result<String> {
    use arrow::datatypes::DataType;

    let mut cols: Vec<String> = Vec::with_capacity(schema.fields().len());
    for field in schema.fields() {
        let ty_name: &str = match field.data_type() {
            DataType::Boolean => dialect.boolean_type_name().unwrap_or("BOOLEAN"),
            DataType::Int8
            | DataType::Int16
            | DataType::Int32
            | DataType::Int64
            | DataType::UInt8
            | DataType::UInt16
            | DataType::UInt32
            | DataType::UInt64 => dialect.integer_type_name().unwrap_or("BIGINT"),
            DataType::Float16 | DataType::Float32 | DataType::Float64 => {
                dialect.number_type_name().unwrap_or("DOUBLE PRECISION")
            }
            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
                dialect.string_type_name().unwrap_or("VARCHAR")
            }
            DataType::Date32 | DataType::Date64 => dialect.date_type_name().unwrap_or("DATE"),
            DataType::Timestamp(_, _) => dialect.datetime_type_name().unwrap_or("TIMESTAMP"),
            DataType::Time32(_) | DataType::Time64(_) => dialect.time_type_name().unwrap_or("TIME"),
            other => {
                return Err(GgsqlError::ReaderError(format!(
                    "AdbcReader::register: unsupported Arrow type for column '{}': {:?}",
                    field.name(),
                    other
                )));
            }
        };
        cols.push(format!(
            "{} {}",
            crate::naming::quote_ident(field.name()),
            ty_name
        ));
    }

    Ok(format!(
        "CREATE TABLE {} ({})",
        crate::naming::quote_ident(name),
        cols.join(", ")
    ))
}

#[cfg(test)]
mod tests {
    use super::*;
    use adbc_datafusion::DataFusionDriver;

    /// Construct a reader over an in-process DataFusion ADBC driver.
    /// DataFusion starts empty; callers register tables via the reader's
    /// `register()` method (added in Task 4) or via raw SQL DDL.
    fn fixture_reader() -> AdbcReader<DataFusionDriver> {
        AdbcReader::from_driver(DataFusionDriver::new(None)).expect("datafusion init")
    }

    #[test]
    fn execute_sql_returns_scalar_result() {
        use crate::array_util::as_i64;
        let reader = fixture_reader();
        let df = reader
            .execute_sql("SELECT 1 AS one, 'hello' AS greeting")
            .expect("query ok");
        assert_eq!(df.height(), 1);
        assert_eq!(df.width(), 2);
        let one = as_i64(df.column("one").unwrap()).unwrap().value(0);
        assert_eq!(one, 1);
    }

    #[test]
    fn register_then_query_roundtrip() {
        use crate::array_util::as_i64;
        use crate::df;

        let reader = fixture_reader();
        let df = df! {
            "x" => vec![1i64, 2, 3],
            "y" => vec!["a", "b", "c"],
        }
        .unwrap();
        reader.register("t", df, false).expect("register ok");

        let out = reader
            .execute_sql("SELECT COUNT(*) AS n FROM t")
            .expect("count ok");
        let n = as_i64(out.column("n").unwrap()).unwrap().value(0);
        assert_eq!(n, 3);
    }

    #[test]
    fn unregister_removes_table() {
        use crate::df;

        let reader = fixture_reader();
        let df = df! { "x" => vec![1i64] }.unwrap();
        reader.register("tmp", df, false).unwrap();

        // First unregister should succeed: table was registered via this reader.
        reader.unregister("tmp").expect("unregister ok");

        // Second unregister must fail: the name was removed from
        // registered_tables, so the guard in unregister() triggers.
        // This verifies the bookkeeping without triggering the
        // adbc_datafusion 0.23 Statement::execute panic that happens on
        // `SELECT * FROM <dropped-table>` (the driver .unwrap()s a DataFusion
        // planning error at lib.rs:913 instead of returning a proper ADBC
        // error — captured in Task 9 findings).
        let err = reader.unregister("tmp").unwrap_err();
        assert!(matches!(err, GgsqlError::ReaderError(_)));
    }

    #[test]
    fn with_dialect_plumbs_custom_dialect_through() {
        // Dummy dialect that overrides a recognizable method so we can verify
        // the reader actually stored and exposes our dialect rather than the
        // default AnsiDialect.
        struct ShoutyDialect;
        impl super::SqlDialect for ShoutyDialect {
            fn integer_type_name(&self) -> Option<&str> {
                Some("SHOUTY_BIGINT")
            }
        }

        let reader = AdbcReader::with_dialect(DataFusionDriver::new(None), Box::new(ShoutyDialect))
            .expect("reader");

        // The Reader trait's dialect() accessor should return our ShoutyDialect.
        assert_eq!(reader.dialect().integer_type_name(), Some("SHOUTY_BIGINT"));
    }

    #[test]
    #[ignore = "ggsql's execute pipeline issues `CREATE OR REPLACE TEMP TABLE` for layer/stat \
                materialization, which adbc_datafusion 0.23 rejects with `NotImplemented(\"Temporary \
                tables not supported\")`. The full pipeline works against any driver that supports \
                TEMP TABLE (DuckDB, Trino, etc.) — see the equivalence tests for that path."]
    fn reader_executes_full_ggsql_visualise_query() {
        use crate::df;

        let reader = fixture_reader();
        let data = df! {
            "date"   => vec!["2024-01-01", "2024-01-02", "2024-01-03"],
            "value"  => vec![10i64, 20, 30],
            "region" => vec!["N", "S", "N"],
        }
        .unwrap();
        reader.register("sales", data, false).unwrap();

        let query = r#"
            SELECT date, value, region FROM sales WHERE value > 5
            VISUALISE date AS x, value AS y, region AS color
            DRAW line
        "#;
        let spec = reader.execute(query).expect("ggsql execute ok");
        let meta = spec.metadata();
        // Full pipeline verification: SQL executed (3 rows after WHERE),
        // VISUALISE parsed, plot resolved with 1 layer.
        assert_eq!(meta.rows, 3);
        assert_eq!(meta.layer_count, 1);
        // The `columns` list reports the *transformed aesthetic* column names
        // (e.g. x -> pos1, y -> pos2, color -> stroke on a line layer) not the
        // raw SQL column names. See `test_execute_metadata` in reader/mod.rs
        // for the same convention.
        assert!(
            meta.columns.iter().any(|c| c == "pos1"),
            "expected pos1 (x aesthetic) in columns: {:?}",
            meta.columns
        );
        assert!(
            meta.columns.iter().any(|c| c == "pos2"),
            "expected pos2 (y aesthetic) in columns: {:?}",
            meta.columns
        );
        assert!(
            meta.columns.iter().any(|c| c == "stroke"),
            "expected stroke (color aesthetic on line) in columns: {:?}",
            meta.columns
        );
    }

    #[test]
    fn execute_sql_handles_multi_batch_result() {
        use crate::array_util::as_i64;
        use crate::df;

        // Register a 50k-row frame. DataFusion's default batch size is typically
        // around 8k rows, so the result read-side should produce >1 RecordBatch
        // and exercise the `for batch in reader` loop.
        let reader = fixture_reader();
        let xs: Vec<i64> = (0..50_000i64).collect();
        let df = df! { "x" => xs }.unwrap();
        reader.register("big", df, false).expect("register ok");

        let out = reader
            .execute_sql("SELECT x FROM big ORDER BY x")
            .expect("query ok");
        assert_eq!(out.height(), 50_000);

        // Spot-check: first + last rows should round-trip correctly.
        let col = out.column("x").unwrap();
        let arr = as_i64(col).unwrap();
        assert_eq!(arr.value(0), 0);
        assert_eq!(arr.value(49_999), 49_999);
    }

    #[test]
    fn execute_sql_handles_nulls() {
        use crate::array_util::as_i64;
        use arrow::array::Array;

        let reader = fixture_reader();
        // Use DataFusion DDL to create a table with a NULL.
        reader
            .execute_sql("CREATE TABLE nulltest (x BIGINT) AS VALUES (1), (NULL), (3)")
            .expect("ddl ok");

        let out = reader
            .execute_sql("SELECT x FROM nulltest ORDER BY x NULLS LAST")
            .expect("query ok");
        assert_eq!(out.height(), 3);

        let col = out.column("x").unwrap();
        let arr = as_i64(col).unwrap();
        // Row 2 should be NULL in the returned DataFrame.
        assert!(arr.is_null(2));
        // Rows 0 and 1 are the non-null values.
        assert_eq!(arr.value(0), 1);
        assert_eq!(arr.value(1), 3);
    }

    #[test]
    #[ignore]
    fn bench_register_and_query_100k_rows() {
        use crate::array_util::as_i64;
        use crate::df;
        use std::time::Instant;

        let reader = fixture_reader();
        let n = 100_000i64;
        let xs: Vec<i64> = (0..n).collect();
        let df = df! { "x" => xs }.unwrap();

        let t0 = Instant::now();
        reader.register("big", df, false).unwrap();
        let reg_ms = t0.elapsed().as_millis();

        let t1 = Instant::now();
        let out = reader.execute_sql("SELECT COUNT(*) AS n FROM big").unwrap();
        let q_ms = t1.elapsed().as_millis();

        let n_out = as_i64(out.column("n").unwrap()).unwrap().value(0);
        assert_eq!(n_out, n);
        eprintln!("register 100k rows: {} ms | query: {} ms", reg_ms, q_ms);
    }

    /// Issue #12: `execute_sql` must hold `conn.borrow_mut()` only long enough
    /// to build + execute the Statement — the returned `RecordBatchReader` is
    /// `Box<dyn ... + 'static>`, so iteration must not require the statement
    /// or the connection borrow to stay alive.
    ///
    /// This mirrors the exact borrow pattern `execute_sql` uses post-fix:
    /// borrow, build+execute, drop the borrow, then iterate. It also kicks
    /// off a second `execute_sql` while the first stream is still alive —
    /// only possible if the first borrow was released.
    #[test]
    fn record_batch_reader_outlives_statement_and_allows_second_query() {
        use arrow::array::RecordBatchReader as _;

        let reader = fixture_reader();

        let stream = {
            // Use `try_borrow_mut` here to mirror `execute_sql`'s production
            // path — if this ever panics in the test, the fix in `execute_sql`
            // has regressed and the borrow scope has crept wider again.
            let mut conn = reader
                .connection
                .try_borrow_mut()
                .expect("fresh reader should allow a mutable borrow");
            let mut stmt = conn.new_statement().expect("new_statement");
            stmt.set_sql_query("SELECT 1 AS v UNION ALL SELECT 2 UNION ALL SELECT 3")
                .expect("set_sql_query");
            stmt.execute().expect("execute")
            // `stmt` and the `RefMut<Connection>` both drop here.
        };

        // With the borrow released, another query on the same reader must
        // work while `stream` is still live.
        let df2 = reader
            .execute_sql("SELECT 42 AS answer")
            .expect("second query");
        assert_eq!(df2.height(), 1);

        // `stream` must still iterate — it does not depend on `stmt` or the
        // original borrow. `schema()` is called before `collect()` consumes
        // the reader.
        let schema = stream.schema();
        let batches = stream
            .collect::<std::result::Result<Vec<_>, _>>()
            .expect("drain");
        let total: usize = batches.iter().map(|b| b.num_rows()).sum();
        assert_eq!(total, 3);
        assert_eq!(schema.fields().len(), 1);
        assert_eq!(schema.field(0).name(), "v");
    }

    #[test]
    fn execute_sql_handles_empty_result_with_schema() {
        let reader = fixture_reader();
        let df = reader
            .execute_sql("SELECT 1 AS a, 'x' AS b WHERE false")
            .expect("query ok");
        // The schema is preserved on zero-batch results: we now pull the
        // declared schema off the `RecordBatchReader` *before* draining
        // batches and hand it to the IPC bridge so an empty result still
        // produces a 0-row DataFrame with the correct columns.
        assert_eq!(df.height(), 0);
        assert_eq!(df.width(), 2);
        let names: Vec<String> = df
            .get_column_names()
            .iter()
            .map(|s| s.to_string())
            .collect();
        assert!(names.contains(&"a".to_string()));
        assert!(names.contains(&"b".to_string()));
    }
}

#[cfg(all(test, feature = "sqlite"))]
mod equivalence_tests {
    //! Equivalence tests: `AdbcReader<sqlite via ManagedDriver>` vs ggsql's
    //! `SqliteReader` on the same query against the same SQLite DB. Validates
    //! correctness of the ADBC abstraction's routing, type bridging, and
    //! ingest paths against a real, fully-functional ADBC driver.
    //!
    //! Skipped by default (gated `#[ignore]`). To run them:
    //!
    //! 1. Install dbc: `curl -LsSf https://dbc.columnar.tech/install.sh | sh`
    //! 2. Install the SQLite driver: `dbc install sqlite`
    //! 3. Run: `cargo test --features "adbc sqlite" -- --ignored equivalence`
    //!
    //! `dbc install` writes the driver to a manifest location that
    //! `ManagedDriver::load_from_name("sqlite", ...)` discovers automatically
    //! (on macOS: `~/Library/Application Support/ADBC/Drivers/sqlite.toml`).
    //!
    //! Why SQLite (and not DuckDB) as the equivalence oracle: `libduckdb` is
    //! distributed as a bundled-static archive, so it can't be loaded as the
    //! shared library that `ManagedDriver` requires. The Apache-published
    //! SQLite ADBC driver ships as `libadbc_driver_sqlite.dylib` and is the
    //! reference C-driver path for round-tripping through `adbc_driver_manager`.

    use crate::reader::sqlite::SqliteDialect;
    use crate::reader::{AdbcReader, Reader, SqliteReader};
    use adbc_core::options::{AdbcVersion, OptionDatabase, OptionValue};
    use adbc_core::LOAD_FLAG_DEFAULT;
    use adbc_driver_manager::ManagedDriver;
    use tempfile::NamedTempFile;

    /// Construct an `AdbcReader` pointed at a specific SQLite file.
    /// Both readers in each test point at the SAME file so equivalence is
    /// over the same physical database.
    fn make_adbc_reader(db_path: &str) -> AdbcReader<ManagedDriver> {
        let driver = ManagedDriver::load_from_name(
            "sqlite",
            None,
            AdbcVersion::V110,
            LOAD_FLAG_DEFAULT,
            None,
        )
        .expect("`dbc install sqlite` first; see module docs");
        let dialect: Box<dyn crate::reader::SqlDialect + Send> = Box::new(SqliteDialect);
        AdbcReader::new_with_database_opts(
            driver,
            dialect,
            std::iter::once((
                OptionDatabase::Uri,
                OptionValue::String(format!("file:{}", db_path)),
            )),
        )
        .expect("construct AdbcReader<sqlite>")
    }

    fn make_sqlite_reader(db_path: &str) -> SqliteReader {
        SqliteReader::from_connection_string(&format!("sqlite://{}", db_path))
            .expect("SqliteReader at the same path")
    }

    /// Compare two DataFrames by schema (field names + types) and by
    /// per-column Arrow array contents. We don't use a blanket
    /// `assert_eq!(df, df)` because `DataFrame` doesn't implement `PartialEq`;
    /// going through schema + per-column equality is also more diagnostic
    /// when one of them diverges.
    fn assert_dataframes_equal(
        adbc_df: &crate::DataFrame,
        sqlite_df: &crate::DataFrame,
        ctx: &str,
    ) {
        let adbc_schema = adbc_df.schema();
        let sqlite_schema = sqlite_df.schema();
        assert_eq!(
            adbc_schema.fields().len(),
            sqlite_schema.fields().len(),
            "{}: column count mismatch (adbc={}, sqlite={})",
            ctx,
            adbc_schema.fields().len(),
            sqlite_schema.fields().len(),
        );
        for (i, (a, s)) in adbc_schema
            .fields()
            .iter()
            .zip(sqlite_schema.fields().iter())
            .enumerate()
        {
            assert_eq!(
                a.name(),
                s.name(),
                "{}: column {} name mismatch (adbc='{}', sqlite='{}')",
                ctx,
                i,
                a.name(),
                s.name(),
            );
            assert_eq!(
                a.data_type(),
                s.data_type(),
                "{}: column '{}' type mismatch (adbc={:?}, sqlite={:?})",
                ctx,
                a.name(),
                a.data_type(),
                s.data_type(),
            );
        }
        assert_eq!(
            adbc_df.height(),
            sqlite_df.height(),
            "{}: row count mismatch (adbc={}, sqlite={})",
            ctx,
            adbc_df.height(),
            sqlite_df.height(),
        );
        for field in adbc_schema.fields() {
            let a = adbc_df.column(field.name()).unwrap();
            let s = sqlite_df.column(field.name()).unwrap();
            assert_eq!(
                a.as_ref(),
                s.as_ref(),
                "{}: column '{}' data mismatch",
                ctx,
                field.name(),
            );
        }
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_simple_select() {
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let direct = make_sqlite_reader(db_path);
        let sql = "SELECT 1 AS x, 'hello' AS y, 3.14 AS z";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "simple select");
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_register_and_query() {
        // Register through the ADBC reader (exercises the standard ADBC
        // bulk-ingest path), then read back through SqliteReader (talks to
        // rusqlite directly against the same file) AND through the ADBC
        // reader. Both should agree.
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let df = crate::df! {
            "x" => vec![1i64, 2, 3, 4, 5],
            "y" => vec![10i64, 20, 30, 40, 50],
        }
        .unwrap();
        adbc.register("t", df, false).unwrap();

        // Open the SqliteReader AFTER the ADBC reader has CREATEd + ingested,
        // so its `Connection::open` sees the on-disk schema written by ADBC.
        let direct = make_sqlite_reader(db_path);

        let sql = "SELECT x, y, x*y AS xy FROM t WHERE y > 15 ORDER BY x";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "register + filter + projection");
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_nulls() {
        // Mix nulls with typed values so both readers infer the same type.
        // (SqliteReader's per-row type inference falls back to Utf8 when a
        // column is *exclusively* NULL, while ADBC carries through the
        // declared INTEGER from the projection metadata. That's a
        // SqliteReader limitation, not an AdbcReader bug, so we steer
        // around it here — see the divergence note in the PR description.)
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let direct = make_sqlite_reader(db_path);
        // SQLite doesn't accept `VALUES (..) AS t(col, ...)` column-list
        // aliases, so build the source rows with UNION ALL — both readers
        // handle this identically.
        let sql = "SELECT i, s FROM ( \
                SELECT CAST(1 AS INTEGER) AS i, CAST('a' AS TEXT) AS s \
                UNION ALL SELECT NULL, 'b' \
                UNION ALL SELECT 3, NULL \
            ) ORDER BY i";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "mixed null + typed values");
    }
}