mini-sqlite-dump 0.1.2

creating sqlite3 dump files from Rust
Documentation

use super::*;

use std::path::Path;
use std::process::Command;

use rusqlite::Connection;
use test_temp_dir::test_temp_dir;
use testresult::TestResult;

#[derive(Debug, PartialEq)]
enum ForComparison<'v> {
    Real(u64),
    Other(ValueRef<'v>),
}

impl<'v> ForComparison<'v> {
    fn real(f: f64) -> Self {
        ForComparison::Real(f.to_bits())
    }
}

/// Faithful conversion (see also [`sqlite_mangle_value`])
impl<'v> From<ValueRef<'v>> for ForComparison<'v> {
    fn from(v: ValueRef<'v>) -> Self {
        match v {
            ValueRef::Real(v) => ForComparison::real(v),
            other => ForComparison::Other(other),
        }
    }
}

/// Mangle a `ValueRef` the way sqlite3 does when storing in a table
///
/// Empirically,
///  * `SELECT {real::write_real(...)}`
///     doesn't always give you what you started with.
///  * `SELECT hex(ieee754_to_blob(ieee754_from_blob(x'{8 digits}')));`
///     doesn't always give you what you started with.
///  * Inserting a value into a table can modify it even further:
///    Negative zeroes are made positive.
///
/// This function simulates what happens if you insert into a table,
/// and then get it out again.
fn mangle_like_sqlite(v: ValueRef) -> ForComparison {
    let f = match v {
        ValueRef::Real(v) => v,
        other => return ForComparison::Other(other),
    };

    if f.is_nan() {
        // sqlite converts all NaNs to NULL, everywhere
        ForComparison::Other(ValueRef::Null)
    } else if f == 0.0 {
        // sqlite converts -0 to 0, but only in tables
        ForComparison::real(f.abs())
    } else if f.is_normal() || f.is_subnormal() || f.is_infinite() {
        ForComparison::real(f)
    } else {
        panic!("unhandled case {f:?}")
    }
}

fn execute_cli_batch(db: &Path, dump: &str) -> TestResult<()> {
    let mut cmd = Command::new("sqlite3");
    cmd.args(["-batch"]);
    cmd.arg(&db);
    cmd.args([format!("\n{}", &dump)]);
    let st = cmd.status()?;
    assert!(st.success(), "{st}");
    Ok(())
}

fn with_readout<T>(
    conn: &mut Connection,
    f: impl FnOnce(
        &rusqlite::Transaction<'_>,
        rusqlite::Rows<'_>,
    ) -> TestResult<T>,
) -> TestResult<T> {
    let dbt = conn.transaction()?;

    let readout_stmt = r#" SELECT * FROM t ORDER BY seq ASC; "#;
    let mut readout = dbt.prepare(readout_stmt)?;
    let readout = readout.query([])?;

    f(&dbt, readout)
}

fn readout_rows(
    conn: &mut Connection,
    mut f: impl FnMut(&rusqlite::Row) -> TestResult<()>,
) -> TestResult<String> {
    with_readout(conn, |dbt, mut readout| {

        let mut dump = Vec::<u8>::new();
        let mut archiver = Archiver::start(&dbt, &mut dump, ['t'])?;
        let mut tarchiver = archiver.start_table("t")?;

        while let Some(read_row) = readout.next()? {
            tarchiver.write_row(read_row)?;
            f(read_row)?;
        }

        archiver.finish()?;

        let dump = String::from_utf8(dump)?;

        Ok(dump)
    })
}

/// Test strategy
///
///  0. Generate a bunch of test data in `HashMap`s
///    and use a dummy schema with `Archiver` to generate `dump0`
///
///  1. Execute `dump0` with `sqlite3(1)` to make disk db `db1`.
///     (NB that the library version may not work because the
///     `ieee754_from_blob` function is in an extension, which
///     doesn't seem to be in the Debian package
///     and certainly isn't loaded by `rusqlite` in these tests IME.
///     But it is in the CLI.
///
///     Read the data out of `db1` with rusqlite,
///     compare it to what we wrote, and generate a new dump.
///
///  2. Execute the 2nd dump to create a 2nd db.
///     Read the data out of both dbs and compare them
///     (this time, insisting that there is no mangling).
///     Generate a third dump file.
///
///     Check that the 2nd and 3rd dump files are the same.
#[test]
fn roundtrip() -> TestResult<()>  { test_temp_dir!().used_by(|temp_dir| {

    // Phase 0

    let mut conn0 = Connection::open_in_memory()?;

    conn0.execute_batch(r#"
        CREATE TABLE t (
            seq  INTEGER NOT NULL,
            i    INTEGER,
            f    REAL,
            t    TEXT,
            b    BLOB
        );
    "#)?;

    let integers = chain!(
        [ i64::MIN, -1, 0, 1, i64::MAX ].map(ValueRef::Integer),
        [ ValueRef::Null ],
    ).collect_vec();

    let blocks = [b"", &[0][..], b"a", "één".as_bytes(), &[0xff]];
    let mk_texts = |v: fn(_) -> ValueRef<'static>| {
        blocks.into_iter().map(v).collect_vec()
    };
    let texts = mk_texts(ValueRef::Text);
    let blobs = mk_texts(ValueRef::Blob);
    let reals = real::interesting_f64s().into_iter()
        .map(ValueRef::Real).collect_vec();

    let generators = [
        &integers,
        &reals,
        &*texts,
        &*blobs,
    ];

    let rows = || {
        let n_rows = generators.iter()
            .map(|g| g.len())
            .max().expect("nonempty");

        let mut seqs = (0..).map(ValueRef::Integer);
        let mut generators = generators.iter().map(
            |g| g.iter().copied().cycle()
        ).collect_vec();
        iter::from_fn(move || Some(
            chain!(
                [seqs.next().expect("cycle")],
                generators.iter_mut().map(|g| g.next().expect("cycle")),
            )
                .collect::<Vec<ValueRef>>()
        )).take(n_rows)
    };

    let dbt0 = conn0.transaction()?;

    let mut dump0 = Vec::<u8>::new();
    let mut archiver = Archiver::start(&dbt0, &mut dump0, ["t"])?;
    let mut tarchiver = archiver.start_table("t")?;

    let columns = "seq i f t b".split_ascii_whitespace();

    for row in rows() {
        let row = izip!(
            columns.clone(),
            row,
        ).collect::<HashMap<_, _>>();

        tarchiver.write_row(&row)?;
    }

    archiver.finish()?;

    drop(dbt0);
    drop(conn0);

    let dump0 = String::from_utf8(dump0)?;

    println!("----------\n{dump0}----------");

    // Phase 1

    let db1 = temp_dir.join("1.db");
    execute_cli_batch(&db1, &dump0)?;

    let mut conn1 = Connection::open(&db1)?;
    let mut input_rows = rows().enumerate();

    let dump1 = readout_rows(&mut conn1, |read_row| {

        let (seq, input_row) = input_rows.next()
            .expect("reloaded is longer!");

        println!("checking row {seq}");

        for (i, n) in columns.clone().enumerate() {
            let input_col = input_row[i];
            let compar_col = mangle_like_sqlite(input_col);
            let read_col = ForComparison::from(read_row.get_ref(n)?);

            println!("    {n} {input_col:x?} {compar_col:x?} {read_col:x?}");

            assert_eq!(compar_col, read_col);
        }

        Ok(())

    })?;

    assert!( input_rows.next().is_none() );

    println!("----------\n{dump1}----------");

    // Phase 2

    let db2 = temp_dir.join("2.db");
    execute_cli_batch(&db2, &dump1)?;

    let mut conn2 = Connection::open(&db2)?;

    let dump2 = with_readout(&mut conn1, |_dbt1, mut readout1| {
        let dump2 = readout_rows(&mut conn2, |row2| {

            let row1 = readout1.next()?.expect("readout1 shorter!");

            for n in columns.clone() {
                let [col1, col2] = [row1, row2]
                    .map(|row| row.get_ref(n).expect(n))
                    .map(ForComparison::from);
                assert_eq!(col1, col2);
            }

            Ok(())
        })?;

        assert!( readout1.next()?.is_none() );

        Ok(dump2)
    })?;

    assert_eq!(dump1, dump2);

    TestResult::Ok(())

}).into_untracked() }