mini-sqlite-dump 0.1.2

creating sqlite3 dump files from Rust
Documentation
#![deny(missing_docs)]
#![doc=include_str!("../README.md")]

use std::collections::{HashMap, VecDeque};
use std::fmt::{self, Display, Write as _};
use std::io;
use std::iter;
use std::mem;
use std::ops;
use std::str;

use anyhow::{anyhow, Context as _};
use derive_deftly::{Deftly, define_derive_deftly};
use easy_ext::ext;
use hex_fmt::HexFmt;
use itertools::{chain, izip, Itertools};
use rusqlite::{Transaction, types::ValueRef};
use thiserror::Error;

mod text;
use text::write_text;
mod real;
use real::write_real;

#[cfg(test)]
mod test;

/// **Writer of sqlite3 data - toplevel entrypoint**
///
/// `W` should be buffered.
/// After an error, it is not safe to continue to write to `W`
/// (with these facilities, or any other):
/// partial data may have been written,
/// so the output should then be treated as corrupted and useless.
///
/// To use this:
///
///  * Call [`Archiver::start()`] and record the `table_names`.
///  * For each table, call [`start_table`](Archiver::start_table),
///    get the rows you want out with a normal database query,
///    and call [`TableArchiver::write_row`] on each one.
///  * At the end, call [`finish`](Archiver::finish).
pub struct Archiver<W> {
    w: W,
    tables: VecDeque<TableInfo>
}

struct TableInfo {
    name: String,
    cols: Vec<String>,
}

/// Handle for writing individual rows with an [`Archiver`].
pub struct TableArchiver<'a, W> {
    a: &'a mut Archiver<W>,
    t: TableInfo,
}

/// Error found while writing a database dump
#[derive(Error, Debug)]
pub enum Error {
    /// IO error on the writer `W`
    #[error("{0}")]
    Io(#[from] io::Error),

    /// Database contains something unsupported by `mini-sqlite-dump`
    #[error("lack of support for these database contents: {0:#}")]
    Unsupported(anyhow::Error),

    /// Database operation unexpectedly failed
    ///
    /// This could mean that the caller is driving `mini-sqlite-dump` wrongly.
    #[error("database operation failed: {0:#}")]
    Db(anyhow::Error),

    /// Internal error in `mini-sqlite-dump` (bug)
    #[error("internal error: {0:#}")]
    Internal(anyhow::Error),
}

type E = Error;

impl<W: io::Write> Archiver<W> {
    /// Start writing a dump.
    ///
    /// Will enumerate the tables found in the database,
    /// and pass their names to `table_names`.
    ///
    /// This can be used to enumerate over all tables;
    /// or they can be ignored if only certain tables need to be dumped.
    ///
    /// (The schema for every existing table will be dumped, unconditionally;
    /// there is not currently a way to control this.)
    pub fn start<S: Into<String>>(
        dbt: &Transaction,
        mut w: W,
        table_names: impl IntoIterator<Item = S>,
    ) -> Result<Self, Error> {
        let mut tables = VecDeque::new();

        write!(w, include_str!("header.sql"))?;

        let user_version: i64  = dbt.query_row(
            r#" PRAGMA user_version "#, [],
            |row| row.get(0)
        )
            .context("execute user_version access pragma").map_err(E::Db)?;

        write!(w, "-- PRAGMA user_version = {user_version};\n")?;

        let encoding: String  = dbt.query_row(
            r#" PRAGMA encoding "#, [],
            |row| row.get(0)
        )
            .context("execute encoding access pragma").map_err(E::Db)?;

        const EXPECTED_ENCODING: &str = "UTF-8";
        if &encoding != EXPECTED_ENCODING {
            return Err(E::Unsupported(anyhow!(
 "database encoding is {encoding:?}, only {EXPECTED_ENCODING:?} is supported"
            )));
        }

        let mut schema_stmt = dbt.prepare(
            r#" SELECT sql FROM 'SQLITE_SCHEMA'
                 WHERE type = 'table' AND name = ? "#
        ).context("prepare schema access query").map_err(E::Db)?;

        for name in table_names {
            let name: String = name.into();
            
            let sql: String = schema_stmt.query_row(
                [&name],
                |row| Ok(row.get(0)),
            )
                .context("execute schema access query").map_err(E::Db)?
                .context("obtain schema text from row").map_err(E::Db)?;

            write!(w, "{};\n", sql)?;

            let pragma = format!(r#" PRAGMA table_xinfo('{name}') "#);

            let mut cols_stmt = dbt.prepare({
                assert!(! name.contains(|c| c=='\'' || c=='\0'));
                &pragma
            }).context("prepare PRAGMA table_inf query").map_err(E::Db)?;

            let cols = cols_stmt.query([])
                .context("execute PRAGMA table_xinfo").map_err(E::Db)?
                .mapped(|row| row.get("name"))
                .collect::<Result<Vec<String>, _>>()
                .context("read/convert PRAGMA table_xinfo rows")
                    .map_err(E::Db)?;

            tables.push_back(TableInfo {
                name,
                cols,
            });
        }

        let self_ = Archiver {
            w,
            tables,
        };
        Ok(self_)
    }

    /// Start writing a dump of a particular table.
    pub fn start_table(&mut self, name: &str)
                       -> Result<TableArchiver<'_, W>, E>
    {
        let t = self.tables.pop_front()
            .ok_or_else(|| internal_error(
                anyhow!("start_table called too many times")
            ))?;

        if t.name != name {
            return Err(internal_error(anyhow!(
                "expected start_table({}), got start_table({name})",
                t.name,
            )));
        }
        
        Ok(TableArchiver {
            a: self,
            t,
        })
    }

    /// Finish writing the dump.
    ///
    /// The writer `W` will be flushed and then dropped.
    pub fn finish(self) -> Result<(), E> {
        self.finish_with_writer()?;
        Ok(())
    }

    /// Finish writing the dump, returning the writer.
    ///
    /// The writer `W` will be flushed.
    pub fn finish_with_writer(mut self) -> Result<W, E> {
        if ! self.tables.is_empty() {
            let e = anyhow!(
                "tables unprocessed at finish! {:?}",
                self.tables.iter().map(|ti| &ti.name).collect_vec()
            );
            return Err(internal_error(e));
        }

        write!(self.w, "COMMIT;\n")?;
        self.w.flush()?;
        Ok(self.w)
    }

    /// Access the inner writer
    ///
    /// Take care!  Using this to write will probably make data corruption.
    pub fn writer_mut(&mut self) -> &mut W {
        &mut self.w
    }
}

/// Row data, that can be archived
pub trait RowLike {
    /// Get an individual data value, by its field name
    fn get_by_name(&self, n: &str) -> rusqlite::Result<ValueRef<'_>>;

    /// Check that the supplied data has at most `l` fields
    ///
    /// If `self` has more than `l` fields, returns an error.
    /// If it has no more than `l`, returns `Ok(())`.
    ///
    /// Used by [`TableArchiver::write_row`]
    /// to check that it has really archived all the data in the row.
    fn check_max_len(&self, l: usize) -> anyhow::Result<()>;
}

impl RowLike for rusqlite::Row<'_> {
    fn get_by_name(&self, n: &str) -> rusqlite::Result<ValueRef<'_>> {
        self.get_ref(n)
    }
    fn check_max_len(&self, l: usize) -> anyhow::Result<()> {
        match self.get_ref(l) {
            Err(rusqlite::Error::InvalidColumnIndex { .. }) => Ok(()),
            Err(other) => Err(
                anyhow::Error::from(other) // we have row already, so
                                           // not deadlock/timeout
                .context(
                    "get out of range column failed in an unexpected way!"
                )),
            Ok(_) => Err(anyhow!(
                "get out of range column succeeded!"
            )),
        }
    }
}

impl RowLike for HashMap<&str, ValueRef<'_>> {
    fn get_by_name(&self, n: &str) -> rusqlite::Result<ValueRef<'_>> {
        self.get(n)
            .copied()
            .ok_or_else(|| rusqlite::Error::InvalidColumnName(n.into()))
    }
    fn check_max_len(&self, l: usize) -> anyhow::Result<()> {
        if self.len() <= l {
            Ok(())
        } else {
            Err(anyhow!("row has {} rows, expected at most {l}", self.len()))
        }
    }
}

impl<W: io::Write> TableArchiver<'_, W> {
    /// Write a single row.
    ///
    /// The row can be a `Row` (for example, returned from a query),
    /// a `HashMap`, or something else implementing `RowLike`.
    ///
    /// The fields in `row` must match those in the actual table.
    pub fn write_row(
        &mut self,
        row: &impl RowLike,
    ) -> Result<(), Error> {
        let mut w = &mut self.a.w;
        let t = &self.t;
        write!(w, "INSERT INTO {} VALUES (", t.name)?;

        row.check_max_len(t.cols.len()).map_err(internal_error)?;
            
        for (delim, col) in izip!(
            chain!([""], iter::repeat(",")),
            &t.cols,
        ) {
            write!(w, "{delim}")?;
            let v = row.get_by_name(col)
                .with_context(|| format!("table {:?}", t.name))
                .context("fetch data row")
                .map_err(E::Db)?;

            write_value(&mut w, v)?;
        }

        write!(w, ");\n")?;

        Ok(())
    }

    /// Access the inner writer
    ///
    /// Take care!  Using this to write will probably make data corruption.
    pub fn writer_mut(&mut self) -> &mut W {
        &mut self.a.w
    }
}

/// Dump a single `rusqlite::ValueRef` in textual format.
///
/// The output syntax is a sqlite3 value expression, in UTF-8.
///
/// This utility method is exposed for completeness;
/// callers using [`Archiver`] do not need it.
pub fn write_value(mut w: impl io::Write, v: ValueRef<'_>) -> Result<(), E> {
    use ValueRef as V;
    match v {
        V::Null => write!(w, "NULL")?,
        V::Integer(i) => write!(w, "{i}")?,
        V::Real(v) => write_real(w, v)?,
        V::Blob(b) => write!(w, "x'{}'", HexFmt(b))?,
        V::Text(t) => write_text(w, t)?,
    };
    Ok(())
}

fn internal_error(ae: anyhow::Error) -> E {
    Error::Internal(ae)
}