mini-sqlite-dump 0.1.2

creating sqlite3 dump files from Rust
Documentation

use super::*;

/// One `D` for each of unq and hex
///
/// `tactic_is` should be a marker unit saying what the tactic means.
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
#[derive(Deftly)]
#[derive_deftly(ByTactic)]
struct ByTactic<M, D> {
    tactic_is: M,
    unq: D,
    hex: D,
}

#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
struct PreviousTactic;

#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
struct ResultingTactic;

#[derive(Copy, Clone, Debug, derive_more::From)]
struct Append<const N: usize>([&'static str; N]);

impl<const N: usize> Append<N> {
    fn nothing() -> Self {
        Append([""; N])
    }
    fn len(&self) -> usize {
        self.0.iter().map(|s| s.len()).sum()
    }
}
impl<const N: usize> Display for Append<N> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for s in self.0 {
            Display::fmt(s, f)?;
        }
        Ok(())
    }
}

#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
enum Tactic<M> {
    /// `'...'` (and `CHAR()`)
    Unq(M),
    /// `CAST(x'...' AS TEXT)`
    ///
    /// Occurrences of fragments of this syntax are marked `//CAST`
    Hex(M),
}

/// Syntactic context (outside hex)
#[derive(Clone, Copy, Debug)]
enum UnqState {
    /// Inside `'...'`
    InSquote,
    /// Before an expression, so "'...'" would be legal, as would `CHAR(..)`.
    BeforeExpr,
    /// After an expression, so `||` will be needed
    AfterExpr,
}

/// How an unq item (`unq_append`) is to be represented
#[derive(Clone, Copy, Debug)]
enum UnqRepr {
    /// `una_qppend` is a character to place within `'...'`
    InSquote,
    /// `una_qppend` is an expression (`CHAR(..)`)
    IsExpr,
}

/// Best way to write previous data, ending in unq state, is:
///
///  * Write `enc.unq`
///  * Now the stream is in state `unq_state`
///
/// Best way to write previous data, ending in hex state, is:
///
///  * Write `enc.hex`
///  * Now the stream ends `CAST(x'....` // CAST
pub struct FragmentWriter<W> {
    w: W,
    enc: ByTactic<PreviousTactic, String>,
    unq_state: UnqState,
}

/// Each value means we can write the thing by:
///
///  * Writing the `self.enc` data for the [`PreviousTactic`]
///  * Writing the `Append` in here
///  * And then the output state will be in [`ResultingTactic`]
type AllProposals<const N: usize> = ByTactic<
    ResultingTactic,
    ByTactic<PreviousTactic, Append<N>>
>;

impl<W: io::Write> FragmentWriter<W> {
    pub fn write_fragment(&mut self, frag: TextFragment) -> Result<(), E> {
        use TextFragment as T;

        match frag {
            T::Nice(s) => self.repr_can_unq(
                s,
                UnqRepr::InSquote,
                s,
            ),
            T::Squote(s) => self.repr_can_unq(
                "''",
                UnqRepr::InSquote,
                s,
            ),
            T::Char(c, s) => self.repr_can_unq(
                format_args!("CHAR({})", c as u32),
                UnqRepr::IsExpr,
                s,
            ),
            T::Bad(b) => self.repr_must_hex(
                b,
            ),
        }
    }

    fn unq_prep_expr(
        &self,
    ) -> &'static str {
        use UnqState as U;
        match self.unq_state {
            U::InSquote   => "'||",
            U::AfterExpr  =>  "||",
            U::BeforeExpr =>    "",
        }
    }

    fn repr_can_unq(
        &mut self,
        unq_append: impl Display,
        unq_repr: UnqRepr,
        bytes: &str,
    ) -> Result<(), E> {
        use UnqState as U;
        use UnqRepr as R;

        let unq_state_after = match unq_repr {
            R::InSquote => U::InSquote,
            R::IsExpr   => U::AfterExpr,
        };

        let all_proposals: AllProposals<2> = ByTactic {
            tactic_is: ResultingTactic,
            unq: ByTactic {
                tactic_is: PreviousTactic,
                hex: [
                        "' AS TEXT)||", //CAST
                        match unq_repr {
                            R::InSquote  => "'",
                            R::IsExpr    => "",
                        },
                    ].into(),
                unq: match (self.unq_state, unq_repr) {
                    (U::InSquote, R::InSquote) => Append::nothing(),
                    _ => [
                        self.unq_prep_expr(),
                        match unq_repr {
                            R::InSquote =>  "'",
                            R::IsExpr   =>   ""
                        }
                    ].into(),
                },
            },
            hex: ByTactic {
                tactic_is: PreviousTactic,
                hex: Append::nothing(),
                unq: [
                    self.unq_prep_expr(),
                    "CAST(x'", // CAST
                ].into(),
            },
        };

        /// Each value means that:
        ///
        ///  * The best way to end up in [`ResultingTactic`]
        ///  * Is to select the entry from `all_proposals`
        ///  * Whose [`PreviousTactic`] is the value, `Tactic`
        type BestPrevTactics = ByTactic<
            ResultingTactic,
            Tactic<PreviousTactic>,
        >;

        let best_prev_tactics: BestPrevTactics =
            all_proposals.as_ref().map(
                |for_result_tactic| self.best_prev_tactic(for_result_tactic)
            );

        if let Ok(use_prev_tactic) = {
            best_prev_tactics
                .iter().map(|(_resulting, use_prev)| use_prev)
                .dedup().exactly_one()
        } {
            // No matter what happens next, we want `use_prev_tactic`

            // Flush the previously encoded buffer
            write!(self.w, "{}", &self.enc[use_prev_tactic])?;
            for (_, e) in self.enc.as_mut().iter() {
                e.clear();
            }

            // Write the Append, as specified
            self.append_proposals(
                all_proposals,
                ByTactic::from_fn(
                    ResultingTactic,
                    |_| use_prev_tactic,
                ),
            );
        } else {
            // There is no one best tactic.  For each resulting tactic,
            // just use the same previous tactic.  Ie, don't switch tactics.
            // (This code structure doesn't prove that this is optimal,
            // but with two tactics the only way this could be suboptimal
            // is if the bests are to *swap* tactics, which is obviously
            // going to be false given that the real syntax involves more
            // output for switching states than for staying in the same state.)

            self.append_proposals(
                all_proposals,
                ByTactic::from_fn(
                    ResultingTactic,
                    |resulting_tactic| resulting_tactic.change_meaning(
                        // For each resulting tactic, we use the previous one
                        |ResultingTactic| PreviousTactic
                    ),
                ),
            );
        }

        write!(self.enc.hex, "{}", HexFmt(bytes)).expect_write();
        // Now self.*hex* is correct

        write!(self.enc.unq, "{unq_append}").expect_write();
        self.unq_state = unq_state_after;
        // Now self.*unq* is correct

        Ok(())
    }

    fn best_prev_tactic<const N: usize>(
        &self,
        proposals: &ByTactic<PreviousTactic, Append<N>>,
    ) -> Tactic<PreviousTactic> {
        proposals
            .as_ref()
            .iter()
            .min_by_key(|(prev, append)| (
                self.enc[*prev].len() + append.len(),
                *prev,
            ))
            .expect("empty iter!")
            .0
    }

    /// Append the selected proposals to the corresponding encoded outputs
    ///
    /// Called only within `repr_can_unq`
    fn append_proposals<const N: usize>(
        &mut self,
        all_proposals: AllProposals<N>,
        use_prev_tactic: ByTactic<ResultingTactic, Tactic<PreviousTactic>>,
    ) {
        for (resulting_tactic, enc) in self.enc.as_mut().iter() {
            let resulting_tactic = resulting_tactic.change_meaning(
                // The Resulting tactic is becoming the Previous one,
                // so we use the ResultingTactic values to update the
                // enc[PreviousTactic].
                |PreviousTactic| ResultingTactic
            );

            let use_prev_tactic = use_prev_tactic[resulting_tactic];
            let append = all_proposals[resulting_tactic][use_prev_tactic];
            write!(enc, "{append}").expect_write();
        }
    }

    fn repr_must_hex(
        &mut self,
        bytes: &[u8],
    ) -> Result<(), E> {
        // Ignore self.*unq*, which isn't useful
        write!(
            self.w, "{}{}",
            mem::take(&mut self.enc.hex),
            HexFmt(bytes),
        )?;
        // Now self.*hex* is correct.

        self.enc.unq.clear();
        write!(self.enc.unq, "' AS TEXT)").expect_write(); // CAST
        self.unq_state = UnqState::AfterExpr;
        // Now self.*unq* is correct

        Ok(())
    }

    pub fn new(w: W) -> Self {
        FragmentWriter {
            w,
            enc: ByTactic {
                tactic_is: PreviousTactic,
                unq: "".into(),
                hex: "CAST(x'".into(), // CAST
            },
            unq_state: UnqState::BeforeExpr,
        }
    }

    pub fn finish(mut self) -> Result<(), E> {
        use UnqState as U;

        let proposals = ByTactic {
            tactic_is: PreviousTactic,
            hex: ["' AS TEXT)"].into(), //CAST
            unq: [match self.unq_state {
                U::AfterExpr => "",
                U::BeforeExpr => "''",
                U::InSquote => "'",
            }].into(),
        };

        let use_prev_tactic = self.best_prev_tactic(&proposals);

        write!(
            self.w, "{}{}",
            self.enc[use_prev_tactic],
            proposals[use_prev_tactic],
        )?;

        Ok(())
    }
}

define_derive_deftly! {
    ByTactic expect items:

    ${defcond D approx_equal($ftype, D)}
    ${define COPY_M { tactic_is: self.tactic_is, }}
    ${define TACTIC { Tactic::${pascal_case $fname} }}

    impl<M: Copy, D> ByTactic<M, D> {
        pub fn from_fn(tactic_is: M, mut f: impl FnMut(Tactic<M>) -> D)
                       -> Self
        {
            ByTactic {
                tactic_is,
                $( ${when D} $fname: f($TACTIC(tactic_is)), )
            }
        }
        pub fn as_ref(&self) -> ByTactic<M, &D> {
            ByTactic {
                $( ${when D} $fname: &self.$fname, )
                $COPY_M
            }
        }
        pub fn as_mut(&mut self) -> ByTactic<M, &mut D> {
            ByTactic {
                $( ${when D} $fname: &mut self.$fname, )
                $COPY_M
            }
        }
        pub fn map<U>(self, mut f: impl FnMut(D) -> U) -> ByTactic<M, U> {
            ByTactic { 
                $( ${when D} $fname: f(self.$fname), )
                $COPY_M
            }
        }
        pub fn iter(self) -> impl Iterator<Item = (Tactic<M>, D)> {
            [$(
                ${when D}
                ($TACTIC(self.tactic_is), self.$fname),
            )].into_iter()
        }
    }

    impl<M: Copy, D> ops::Index<Tactic<M>> for ByTactic<M, D> {
        type Output = D;
        fn index(&self, t: Tactic<M>) -> &D {
            match t { $(
                ${when D}
                $TACTIC(..) => &self.$fname,
            ) }
        }
    }

    impl<M> Tactic<M> {
        fn change_meaning<N>(self, f: impl Fn(M) -> N) -> Tactic<N> {
            match self { ${for fields {
                ${when D}
                $TACTIC(m) => $TACTIC(f(m)),
            }} }
        }
    }
}
use derive_deftly_template_ByTactic;