ggsql 0.4.1 - Docs.rs

//! ADBC (Arrow Database Connectivity) reader.
//!
//! Generic over any concrete ADBC `Driver` implementation. Verified against
//! two drivers in this crate's tests:
//!
//! - `adbc_datafusion` — pure-Rust, in-process. Used for routing and
//!   conversion unit tests where loading a native driver isn't worth the
//!   build complexity.
//! - `adbc_driver_duckdb` (loaded via `adbc_driver_manager::ManagedDriver`)
//!   — a real ADBC C driver, used for an equivalence suite that compares
//!   `AdbcReader<DuckDB>` output against ggsql's existing `DuckDBReader`.
//!
//! The `Reader` trait takes `&self`, but ADBC's `Statement` API takes
//! `&mut self`. We bridge this with `RefCell` around the `Connection`,
//! mirroring the interior-mutability pattern used by `OdbcReader`.

use crate::reader::{AnsiDialect, Reader, SqlDialect};
use crate::{DataFrame, GgsqlError, Result};
use adbc_core::sync::{Connection, Database, Driver};
use std::cell::RefCell;
use std::collections::HashSet;

pub struct AdbcReader<D: Driver> {
    // Driver must stay alive as long as the Database does (per ADBC contract).
    _driver: D,
    // Database must stay alive as long as the Connection does.
    _database: D::DatabaseType,
    // Connection is what Statements are made from. Wrapped in RefCell because
    // new_statement / set_sql_query / execute all take &mut, but Reader::execute_sql
    // takes &self.
    connection: RefCell<<D::DatabaseType as Database>::ConnectionType>,
    dialect: Box<dyn SqlDialect + Send>,
    registered_tables: RefCell<HashSet<String>>,
}

impl<D: Driver> AdbcReader<D> {
    /// Construct an `AdbcReader` with an explicit `SqlDialect`. Use this to
    /// plug in backend-specific dialects (e.g. a TrinoDialect, SnowflakeDialect)
    /// when the reader is pointed at that backend.
    pub fn with_dialect(driver: D, dialect: Box<dyn SqlDialect + Send>) -> Result<Self> {
        Self::new(driver, dialect)
    }

    /// Create a new `AdbcReader` from an already-initialized ADBC driver.
    ///
    /// Callers are responsible for any pre-init `Database` / `Connection`
    /// options. For convenience, use `from_driver` for the common case
    /// with the ANSI dialect, or pass a custom `SqlDialect`
    /// (e.g. a Trino / Snowflake dialect) here directly.
    pub fn new(mut driver: D, dialect: Box<dyn SqlDialect + Send>) -> Result<Self> {
        let database = driver
            .new_database()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_database failed: {}", e)))?;
        let connection = database
            .new_connection()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_connection failed: {}", e)))?;
        Ok(Self {
            _driver: driver,
            _database: database,
            connection: RefCell::new(connection),
            dialect,
            registered_tables: RefCell::new(HashSet::new()),
        })
    }

    /// Create a new `AdbcReader`, passing pre-init options to the underlying
    /// `Database`. Use this when the driver requires URI / credentials / RPC
    /// header options to be set before the first connection (e.g. Flight SQL
    /// or other auth-required backends).
    pub fn new_with_database_opts(
        mut driver: D,
        dialect: Box<dyn SqlDialect + Send>,
        opts: impl IntoIterator<
            Item = (
                adbc_core::options::OptionDatabase,
                adbc_core::options::OptionValue,
            ),
        >,
    ) -> Result<Self> {
        let database = driver.new_database_with_opts(opts).map_err(|e| {
            GgsqlError::ReaderError(format!("ADBC new_database_with_opts failed: {}", e))
        })?;
        let connection = database
            .new_connection()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_connection failed: {}", e)))?;
        Ok(Self {
            _driver: driver,
            _database: database,
            connection: RefCell::new(connection),
            dialect,
            registered_tables: RefCell::new(HashSet::new()),
        })
    }

    /// Convenience: construct with the ANSI dialect. Good default for
    /// standards-compliant backends; use `new` directly to plug in a
    /// backend-specific dialect.
    pub fn from_driver(driver: D) -> Result<Self> {
        Self::new(driver, Box::new(AnsiDialect))
    }
}

use adbc_core::sync::Statement;
use arrow::record_batch::RecordBatch;

impl<D: Driver + 'static> Reader for AdbcReader<D>
where
    D::DatabaseType: 'static,
    <D::DatabaseType as Database>::ConnectionType: 'static,
{
    fn execute_sql(&self, sql: &str) -> Result<DataFrame> {
        use arrow::array::RecordBatchReader as _;

        // Drain the `RecordBatchReader` *inside* the connection-borrow scope
        // so `stmt` and the `RefMut<Connection>` stay alive while batches are
        // streamed from the server. The `FlightSQL` driver's reader holds a
        // gRPC stream whose context is tied to the Statement; if `stmt` drops
        // before iteration completes, the first `DoGet` call cancels with
        // `Canceled; DoGet: endpoint 0: []`. Other ADBC drivers (DataFusion,
        // etc.) return self-sufficient readers, but paying for an extra early
        // release on those is worthwhile to keep a single correct code path.
        let (schema, batches) = {
            let mut conn = self.connection.try_borrow_mut().map_err(|_| {
                GgsqlError::ReaderError(
                    "AdbcReader is already mutably borrowed — another \
                     `execute_sql`/`register`/`unregister` is in progress \
                     on this reader"
                        .into(),
                )
            })?;
            let mut stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            stmt.set_sql_query(sql)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query: {}", e)))?;
            let reader = stmt
                .execute()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute: {}", e)))?;

            // Capture the declared result schema before draining batches —
            // the reader carries it even when zero batches are produced, and
            // we need it to preserve column names on empty results.
            let schema = reader.schema();
            let mut batches: Vec<RecordBatch> = Vec::new();
            for batch in reader {
                batches.push(batch.map_err(|e| {
                    GgsqlError::ReaderError(format!("ADBC RecordBatch iter: {}", e))
                })?);
            }
            (schema, batches)
        };

        let merged = if batches.is_empty() {
            RecordBatch::new_empty(schema)
        } else if batches.len() == 1 {
            batches.into_iter().next().unwrap()
        } else {
            arrow::compute::concat_batches(&schema, &batches)
                .map_err(|e| GgsqlError::ReaderError(format!("concat_batches: {}", e)))?
        };
        Ok(DataFrame::from_record_batch(merged))
    }

    fn register(&self, name: &str, df: DataFrame, replace: bool) -> Result<()> {
        super::validate_table_name(name)?;

        use adbc_core::options::{IngestMode, OptionStatement, OptionValue};
        use adbc_core::Optionable;

        if df.height() == 0 {
            return Err(GgsqlError::ReaderError(
                "AdbcReader::register: empty DataFrame not supported".into(),
            ));
        }
        let batch = df.into_inner();

        let mut conn = self.connection.try_borrow_mut().map_err(|_| {
            GgsqlError::ReaderError(
                "AdbcReader::register called re-entrantly — another operation \
                 is still holding the connection on this reader"
                    .into(),
            )
        })?;

        // Bulk-insert path: CREATE TABLE via SQL DDL, then for each batch set
        // `TargetTable` + `IngestMode::Append` + `bind(batch)` +
        // `execute_update()`. We do the CREATE ourselves (rather than relying
        // on `IngestMode::Create`) so we control the column types via the
        // `SqlDialect` and so registers behave identically across drivers
        // with varying ingest-option support — in particular,
        // `adbc_datafusion` 0.23 has `bind_stream` as `todo!()` and rejects
        // the `IngestMode` option key (`set_option` returns `NotFound`),
        // which is silently tolerated below.
        let schema = batch.schema();
        if replace {
            let drop_sql = format!("DROP TABLE IF EXISTS {}", crate::naming::quote_ident(name));
            let mut drop_stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            drop_stmt
                .set_sql_query(&drop_sql)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query DROP: {}", e)))?;
            drop_stmt
                .execute_update()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute_update DROP: {}", e)))?;
        }

        let create_sql = create_table_sql(name, &schema, &*self.dialect)?;
        let mut create_stmt = conn
            .new_statement()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
        create_stmt
            .set_sql_query(&create_sql)
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC set_sql_query CREATE: {}", e)))?;
        create_stmt
            .execute_update()
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC execute_update CREATE: {}", e)))?;

        // Track the table in our set as soon as CREATE succeeds — BEFORE the
        // potentially-multi-batch ingest loop. If a bind or execute_update
        // fails mid-way, the (partial) table still exists on the server;
        // having the name tracked lets the caller `unregister()` to clean
        // up, and a subsequent `register(name, ..., replace=true)` will
        // drop-and-recreate. Without this, a mid-ingest failure would leave
        // an orphan table the reader can't reach.
        self.registered_tables.borrow_mut().insert(name.to_string());

        {
            let mut stmt = conn
                .new_statement()
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC new_statement: {}", e)))?;
            stmt.set_option(
                OptionStatement::TargetTable,
                OptionValue::String(name.to_string()),
            )
            .map_err(|e| GgsqlError::ReaderError(format!("ADBC set TargetTable: {}", e)))?;
            // Tell the driver this is an append into the table we just
            // CREATEd above. Compliant ADBC drivers (e.g. the Apache SQLite
            // driver) default `IngestMode` to `Create` when only `TargetTable`
            // is set, which would then fail because the table already exists.
            // DataFusion 0.23 doesn't expose this option key and returns
            // `Status::NotFound` from `set_option`; that's expected for
            // DataFusion's bind path (it appends by default), so swallow it
            // and continue rather than failing register().
            if let Err(e) = stmt.set_option(
                OptionStatement::IngestMode,
                OptionValue::from(IngestMode::Append),
            ) {
                if e.status != adbc_core::error::Status::NotFound {
                    return Err(GgsqlError::ReaderError(format!(
                        "ADBC set IngestMode=Append: {}",
                        e
                    )));
                }
            }
            stmt.bind(batch)
                .map_err(|e| GgsqlError::ReaderError(format!("ADBC bind: {}", e)))?;
            stmt.execute_update().map_err(|e| {
                GgsqlError::ReaderError(format!(
                    "ADBC execute_update: {} — \
                     table left on server; call unregister() to drop it \
                     or register() with replace=true to retry",
                    e
                ))
            })?;
        }

        Ok(())
    }

    fn unregister(&self, name: &str) -> Result<()> {
        if !self.registered_tables.borrow().contains(name) {
            return Err(GgsqlError::ReaderError(format!(
                "Table '{}' was not registered via this reader",
                name
            )));
        }
        let sql = format!("DROP TABLE IF EXISTS {}", crate::naming::quote_ident(name));
        // Ignore the returned DataFrame — DROP TABLE has no result rows.
        self.execute_sql(&sql)?;
        self.registered_tables.borrow_mut().remove(name);
        Ok(())
    }

    fn execute(&self, query: &str) -> Result<crate::reader::Spec> {
        crate::reader::execute_with_reader(self, query)
    }

    fn dialect(&self) -> &dyn SqlDialect {
        &*self.dialect
    }
}

/// Build a `CREATE TABLE <name> (col1 TYPE, col2 TYPE, ...)` statement from
/// an Arrow schema, using the reader's `SqlDialect` for type names.
///
/// Used by `register()` to create the destination table before binding
/// batches with `IngestMode::Append`; see the `register` impl for context.
fn create_table_sql(
    name: &str,
    schema: &arrow::datatypes::Schema,
    dialect: &dyn SqlDialect,
) -> Result<String> {
    use arrow::datatypes::DataType;

    let mut cols: Vec<String> = Vec::with_capacity(schema.fields().len());
    for field in schema.fields() {
        let ty_name: &str = match field.data_type() {
            DataType::Boolean => dialect.boolean_type_name().unwrap_or("BOOLEAN"),
            DataType::Int8
            | DataType::Int16
            | DataType::Int32
            | DataType::Int64
            | DataType::UInt8
            | DataType::UInt16
            | DataType::UInt32
            | DataType::UInt64 => dialect.integer_type_name().unwrap_or("BIGINT"),
            DataType::Float16 | DataType::Float32 | DataType::Float64 => {
                dialect.number_type_name().unwrap_or("DOUBLE PRECISION")
            }
            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
                dialect.string_type_name().unwrap_or("VARCHAR")
            }
            DataType::Date32 | DataType::Date64 => dialect.date_type_name().unwrap_or("DATE"),
            DataType::Timestamp(_, _) => dialect.datetime_type_name().unwrap_or("TIMESTAMP"),
            DataType::Time32(_) | DataType::Time64(_) => dialect.time_type_name().unwrap_or("TIME"),
            other => {
                return Err(GgsqlError::ReaderError(format!(
                    "AdbcReader::register: unsupported Arrow type for column '{}': {:?}",
                    field.name(),
                    other
                )));
            }
        };
        cols.push(format!(
            "{} {}",
            crate::naming::quote_ident(field.name()),
            ty_name
        ));
    }

    Ok(format!(
        "CREATE TABLE {} ({})",
        crate::naming::quote_ident(name),
        cols.join(", ")
    ))
}

#[cfg(test)]
mod tests {
    use super::*;
    use adbc_datafusion::DataFusionDriver;

    /// Construct a reader over an in-process DataFusion ADBC driver.
    /// DataFusion starts empty; callers register tables via the reader's
    /// `register()` method (added in Task 4) or via raw SQL DDL.
    fn fixture_reader() -> AdbcReader<DataFusionDriver> {
        AdbcReader::from_driver(DataFusionDriver::new(None)).expect("datafusion init")
    }

    #[test]
    fn execute_sql_returns_scalar_result() {
        use crate::array_util::as_i64;
        let reader = fixture_reader();
        let df = reader
            .execute_sql("SELECT 1 AS one, 'hello' AS greeting")
            .expect("query ok");
        assert_eq!(df.height(), 1);
        assert_eq!(df.width(), 2);
        let one = as_i64(df.column("one").unwrap()).unwrap().value(0);
        assert_eq!(one, 1);
    }

    #[test]
    fn register_then_query_roundtrip() {
        use crate::array_util::as_i64;
        use crate::df;

        let reader = fixture_reader();
        let df = df! {
            "x" => vec![1i64, 2, 3],
            "y" => vec!["a", "b", "c"],
        }
        .unwrap();
        reader.register("t", df, false).expect("register ok");

        let out = reader
            .execute_sql("SELECT COUNT(*) AS n FROM t")
            .expect("count ok");
        let n = as_i64(out.column("n").unwrap()).unwrap().value(0);
        assert_eq!(n, 3);
    }

    #[test]
    fn unregister_removes_table() {
        use crate::df;

        let reader = fixture_reader();
        let df = df! { "x" => vec![1i64] }.unwrap();
        reader.register("tmp", df, false).unwrap();

        // First unregister should succeed: table was registered via this reader.
        reader.unregister("tmp").expect("unregister ok");

        // Second unregister must fail: the name was removed from
        // registered_tables, so the guard in unregister() triggers.
        // This verifies the bookkeeping without triggering the
        // adbc_datafusion 0.23 Statement::execute panic that happens on
        // `SELECT * FROM <dropped-table>` (the driver .unwrap()s a DataFusion
        // planning error at lib.rs:913 instead of returning a proper ADBC
        // error — captured in Task 9 findings).
        let err = reader.unregister("tmp").unwrap_err();
        assert!(matches!(err, GgsqlError::ReaderError(_)));
    }

    #[test]
    fn with_dialect_plumbs_custom_dialect_through() {
        // Dummy dialect that overrides a recognizable method so we can verify
        // the reader actually stored and exposes our dialect rather than the
        // default AnsiDialect.
        struct ShoutyDialect;
        impl super::SqlDialect for ShoutyDialect {
            fn integer_type_name(&self) -> Option<&str> {
                Some("SHOUTY_BIGINT")
            }
        }

        let reader = AdbcReader::with_dialect(DataFusionDriver::new(None), Box::new(ShoutyDialect))
            .expect("reader");

        // The Reader trait's dialect() accessor should return our ShoutyDialect.
        assert_eq!(reader.dialect().integer_type_name(), Some("SHOUTY_BIGINT"));
    }

    #[test]
    #[ignore = "ggsql's execute pipeline issues `CREATE OR REPLACE TEMP TABLE` for layer/stat \
                materialization, which adbc_datafusion 0.23 rejects with `NotImplemented(\"Temporary \
                tables not supported\")`. The full pipeline works against any driver that supports \
                TEMP TABLE (DuckDB, Trino, etc.) — see the equivalence tests for that path."]
    fn reader_executes_full_ggsql_visualise_query() {
        use crate::df;

        let reader = fixture_reader();
        let data = df! {
            "date"   => vec!["2024-01-01", "2024-01-02", "2024-01-03"],
            "value"  => vec![10i64, 20, 30],
            "region" => vec!["N", "S", "N"],
        }
        .unwrap();
        reader.register("sales", data, false).unwrap();

        let query = r#"
            SELECT date, value, region FROM sales WHERE value > 5
            VISUALISE date AS x, value AS y, region AS color
            DRAW line
        "#;
        let spec = reader.execute(query).expect("ggsql execute ok");
        let meta = spec.metadata();
        // Full pipeline verification: SQL executed (3 rows after WHERE),
        // VISUALISE parsed, plot resolved with 1 layer.
        assert_eq!(meta.rows, 3);
        assert_eq!(meta.layer_count, 1);
        // The `columns` list reports the *transformed aesthetic* column names
        // (e.g. x -> pos1, y -> pos2, color -> stroke on a line layer) not the
        // raw SQL column names. See `test_execute_metadata` in reader/mod.rs
        // for the same convention.
        assert!(
            meta.columns.iter().any(|c| c == "pos1"),
            "expected pos1 (x aesthetic) in columns: {:?}",
            meta.columns
        );
        assert!(
            meta.columns.iter().any(|c| c == "pos2"),
            "expected pos2 (y aesthetic) in columns: {:?}",
            meta.columns
        );
        assert!(
            meta.columns.iter().any(|c| c == "stroke"),
            "expected stroke (color aesthetic on line) in columns: {:?}",
            meta.columns
        );
    }

    #[test]
    fn execute_sql_handles_multi_batch_result() {
        use crate::array_util::as_i64;
        use crate::df;

        // Register a 50k-row frame. DataFusion's default batch size is typically
        // around 8k rows, so the result read-side should produce >1 RecordBatch
        // and exercise the `for batch in reader` loop.
        let reader = fixture_reader();
        let xs: Vec<i64> = (0..50_000i64).collect();
        let df = df! { "x" => xs }.unwrap();
        reader.register("big", df, false).expect("register ok");

        let out = reader
            .execute_sql("SELECT x FROM big ORDER BY x")
            .expect("query ok");
        assert_eq!(out.height(), 50_000);

        // Spot-check: first + last rows should round-trip correctly.
        let col = out.column("x").unwrap();
        let arr = as_i64(col).unwrap();
        assert_eq!(arr.value(0), 0);
        assert_eq!(arr.value(49_999), 49_999);
    }

    #[test]
    fn execute_sql_handles_nulls() {
        use crate::array_util::as_i64;
        use arrow::array::Array;

        let reader = fixture_reader();
        // Use DataFusion DDL to create a table with a NULL.
        reader
            .execute_sql("CREATE TABLE nulltest (x BIGINT) AS VALUES (1), (NULL), (3)")
            .expect("ddl ok");

        let out = reader
            .execute_sql("SELECT x FROM nulltest ORDER BY x NULLS LAST")
            .expect("query ok");
        assert_eq!(out.height(), 3);

        let col = out.column("x").unwrap();
        let arr = as_i64(col).unwrap();
        // Row 2 should be NULL in the returned DataFrame.
        assert!(arr.is_null(2));
        // Rows 0 and 1 are the non-null values.
        assert_eq!(arr.value(0), 1);
        assert_eq!(arr.value(1), 3);
    }

    #[test]
    #[ignore]
    fn bench_register_and_query_100k_rows() {
        use crate::array_util::as_i64;
        use crate::df;
        use std::time::Instant;

        let reader = fixture_reader();
        let n = 100_000i64;
        let xs: Vec<i64> = (0..n).collect();
        let df = df! { "x" => xs }.unwrap();

        let t0 = Instant::now();
        reader.register("big", df, false).unwrap();
        let reg_ms = t0.elapsed().as_millis();

        let t1 = Instant::now();
        let out = reader.execute_sql("SELECT COUNT(*) AS n FROM big").unwrap();
        let q_ms = t1.elapsed().as_millis();

        let n_out = as_i64(out.column("n").unwrap()).unwrap().value(0);
        assert_eq!(n_out, n);
        eprintln!("register 100k rows: {} ms | query: {} ms", reg_ms, q_ms);
    }

    /// Issue #12: `execute_sql` must hold `conn.borrow_mut()` only long enough
    /// to build + execute the Statement — the returned `RecordBatchReader` is
    /// `Box<dyn ... + 'static>`, so iteration must not require the statement
    /// or the connection borrow to stay alive.
    ///
    /// This mirrors the exact borrow pattern `execute_sql` uses post-fix:
    /// borrow, build+execute, drop the borrow, then iterate. It also kicks
    /// off a second `execute_sql` while the first stream is still alive —
    /// only possible if the first borrow was released.
    #[test]
    fn record_batch_reader_outlives_statement_and_allows_second_query() {
        use arrow::array::RecordBatchReader as _;

        let reader = fixture_reader();

        let stream = {
            // Use `try_borrow_mut` here to mirror `execute_sql`'s production
            // path — if this ever panics in the test, the fix in `execute_sql`
            // has regressed and the borrow scope has crept wider again.
            let mut conn = reader
                .connection
                .try_borrow_mut()
                .expect("fresh reader should allow a mutable borrow");
            let mut stmt = conn.new_statement().expect("new_statement");
            stmt.set_sql_query("SELECT 1 AS v UNION ALL SELECT 2 UNION ALL SELECT 3")
                .expect("set_sql_query");
            stmt.execute().expect("execute")
            // `stmt` and the `RefMut<Connection>` both drop here.
        };

        // With the borrow released, another query on the same reader must
        // work while `stream` is still live.
        let df2 = reader
            .execute_sql("SELECT 42 AS answer")
            .expect("second query");
        assert_eq!(df2.height(), 1);

        // `stream` must still iterate — it does not depend on `stmt` or the
        // original borrow. `schema()` is called before `collect()` consumes
        // the reader.
        let schema = stream.schema();
        let batches = stream
            .collect::<std::result::Result<Vec<_>, _>>()
            .expect("drain");
        let total: usize = batches.iter().map(|b| b.num_rows()).sum();
        assert_eq!(total, 3);
        assert_eq!(schema.fields().len(), 1);
        assert_eq!(schema.field(0).name(), "v");
    }

    #[test]
    fn execute_sql_handles_empty_result_with_schema() {
        let reader = fixture_reader();
        let df = reader
            .execute_sql("SELECT 1 AS a, 'x' AS b WHERE false")
            .expect("query ok");
        // The schema is preserved on zero-batch results: we now pull the
        // declared schema off the `RecordBatchReader` *before* draining
        // batches and hand it to the IPC bridge so an empty result still
        // produces a 0-row DataFrame with the correct columns.
        assert_eq!(df.height(), 0);
        assert_eq!(df.width(), 2);
        let names: Vec<String> = df
            .get_column_names()
            .iter()
            .map(|s| s.to_string())
            .collect();
        assert!(names.contains(&"a".to_string()));
        assert!(names.contains(&"b".to_string()));
    }
}

#[cfg(all(test, feature = "sqlite"))]
mod equivalence_tests {
    //! Equivalence tests: `AdbcReader<sqlite via ManagedDriver>` vs ggsql's
    //! `SqliteReader` on the same query against the same SQLite DB. Validates
    //! correctness of the ADBC abstraction's routing, type bridging, and
    //! ingest paths against a real, fully-functional ADBC driver.
    //!
    //! Skipped by default (gated `#[ignore]`). To run them:
    //!
    //! 1. Install dbc: `curl -LsSf https://dbc.columnar.tech/install.sh | sh`
    //! 2. Install the SQLite driver: `dbc install sqlite`
    //! 3. Run: `cargo test --features "adbc sqlite" -- --ignored equivalence`
    //!
    //! `dbc install` writes the driver to a manifest location that
    //! `ManagedDriver::load_from_name("sqlite", ...)` discovers automatically
    //! (on macOS: `~/Library/Application Support/ADBC/Drivers/sqlite.toml`).
    //!
    //! Why SQLite (and not DuckDB) as the equivalence oracle: `libduckdb` is
    //! distributed as a bundled-static archive, so it can't be loaded as the
    //! shared library that `ManagedDriver` requires. The Apache-published
    //! SQLite ADBC driver ships as `libadbc_driver_sqlite.dylib` and is the
    //! reference C-driver path for round-tripping through `adbc_driver_manager`.

    use crate::reader::sqlite::SqliteDialect;
    use crate::reader::{AdbcReader, Reader, SqliteReader};
    use adbc_core::options::{AdbcVersion, OptionDatabase, OptionValue};
    use adbc_core::LOAD_FLAG_DEFAULT;
    use adbc_driver_manager::ManagedDriver;
    use tempfile::NamedTempFile;

    /// Construct an `AdbcReader` pointed at a specific SQLite file.
    /// Both readers in each test point at the SAME file so equivalence is
    /// over the same physical database.
    fn make_adbc_reader(db_path: &str) -> AdbcReader<ManagedDriver> {
        let driver = ManagedDriver::load_from_name(
            "sqlite",
            None,
            AdbcVersion::V110,
            LOAD_FLAG_DEFAULT,
            None,
        )
        .expect("`dbc install sqlite` first; see module docs");
        let dialect: Box<dyn crate::reader::SqlDialect + Send> = Box::new(SqliteDialect);
        AdbcReader::new_with_database_opts(
            driver,
            dialect,
            std::iter::once((
                OptionDatabase::Uri,
                OptionValue::String(format!("file:{}", db_path)),
            )),
        )
        .expect("construct AdbcReader<sqlite>")
    }

    fn make_sqlite_reader(db_path: &str) -> SqliteReader {
        SqliteReader::from_connection_string(&format!("sqlite://{}", db_path))
            .expect("SqliteReader at the same path")
    }

    /// Compare two DataFrames by schema (field names + types) and by
    /// per-column Arrow array contents. We don't use a blanket
    /// `assert_eq!(df, df)` because `DataFrame` doesn't implement `PartialEq`;
    /// going through schema + per-column equality is also more diagnostic
    /// when one of them diverges.
    fn assert_dataframes_equal(
        adbc_df: &crate::DataFrame,
        sqlite_df: &crate::DataFrame,
        ctx: &str,
    ) {
        let adbc_schema = adbc_df.schema();
        let sqlite_schema = sqlite_df.schema();
        assert_eq!(
            adbc_schema.fields().len(),
            sqlite_schema.fields().len(),
            "{}: column count mismatch (adbc={}, sqlite={})",
            ctx,
            adbc_schema.fields().len(),
            sqlite_schema.fields().len(),
        );
        for (i, (a, s)) in adbc_schema
            .fields()
            .iter()
            .zip(sqlite_schema.fields().iter())
            .enumerate()
        {
            assert_eq!(
                a.name(),
                s.name(),
                "{}: column {} name mismatch (adbc='{}', sqlite='{}')",
                ctx,
                i,
                a.name(),
                s.name(),
            );
            assert_eq!(
                a.data_type(),
                s.data_type(),
                "{}: column '{}' type mismatch (adbc={:?}, sqlite={:?})",
                ctx,
                a.name(),
                a.data_type(),
                s.data_type(),
            );
        }
        assert_eq!(
            adbc_df.height(),
            sqlite_df.height(),
            "{}: row count mismatch (adbc={}, sqlite={})",
            ctx,
            adbc_df.height(),
            sqlite_df.height(),
        );
        for field in adbc_schema.fields() {
            let a = adbc_df.column(field.name()).unwrap();
            let s = sqlite_df.column(field.name()).unwrap();
            assert_eq!(
                a.as_ref(),
                s.as_ref(),
                "{}: column '{}' data mismatch",
                ctx,
                field.name(),
            );
        }
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_simple_select() {
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let direct = make_sqlite_reader(db_path);
        let sql = "SELECT 1 AS x, 'hello' AS y, 3.14 AS z";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "simple select");
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_register_and_query() {
        // Register through the ADBC reader (exercises the standard ADBC
        // bulk-ingest path), then read back through SqliteReader (talks to
        // rusqlite directly against the same file) AND through the ADBC
        // reader. Both should agree.
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let df = crate::df! {
            "x" => vec![1i64, 2, 3, 4, 5],
            "y" => vec![10i64, 20, 30, 40, 50],
        }
        .unwrap();
        adbc.register("t", df, false).unwrap();

        // Open the SqliteReader AFTER the ADBC reader has CREATEd + ingested,
        // so its `Connection::open` sees the on-disk schema written by ADBC.
        let direct = make_sqlite_reader(db_path);

        let sql = "SELECT x, y, x*y AS xy FROM t WHERE y > 15 ORDER BY x";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "register + filter + projection");
    }

    #[test]
    #[ignore = "requires `dbc install sqlite`; see module docs"]
    fn equiv_nulls() {
        // Mix nulls with typed values so both readers infer the same type.
        // (SqliteReader's per-row type inference falls back to Utf8 when a
        // column is *exclusively* NULL, while ADBC carries through the
        // declared INTEGER from the projection metadata. That's a
        // SqliteReader limitation, not an AdbcReader bug, so we steer
        // around it here — see the divergence note in the PR description.)
        let db = NamedTempFile::new().unwrap();
        let db_path = db.path().to_str().unwrap();
        let adbc = make_adbc_reader(db_path);
        let direct = make_sqlite_reader(db_path);
        // SQLite doesn't accept `VALUES (..) AS t(col, ...)` column-list
        // aliases, so build the source rows with UNION ALL — both readers
        // handle this identically.
        let sql = "SELECT i, s FROM ( \
                SELECT CAST(1 AS INTEGER) AS i, CAST('a' AS TEXT) AS s \
                UNION ALL SELECT NULL, 'b' \
                UNION ALL SELECT 3, NULL \
            ) ORDER BY i";
        let a = adbc.execute_sql(sql).unwrap();
        let d = direct.execute_sql(sql).unwrap();
        assert_dataframes_equal(&a, &d, "mixed null + typed values");
    }
}