csv-diff 0.1.2

Compare two CSVs - with ludicrous speed 🚀.
Documentation
use std::cmp::Ordering;

#[derive(Debug, PartialEq, Clone)]
pub enum DiffByteRecord {
    Add(ByteRecordLineInfo),
    Modify {
        delete: ByteRecordLineInfo,
        add: ByteRecordLineInfo,
        field_indices: Vec<usize>,
    },
    Delete(ByteRecordLineInfo),
}

impl DiffByteRecord {
    pub fn line_num(&self) -> LineNum {
        match self {
            Self::Add(rli) | Self::Delete(rli) => LineNum::OneSide(rli.line),
            Self::Modify {
                delete: deleted,
                add: added,
                ..
            } => LineNum::BothSides {
                for_deleted: deleted.line,
                for_added: added.line,
            },
        }
    }

    pub(crate) fn cmp_by_line(&self, other: &DiffByteRecord) -> Ordering {
        match (self.line_num(), other.line_num()) {
            (LineNum::OneSide(line_num_a), LineNum::OneSide(line_num_b)) => line_num_a
                .cmp(&line_num_b)
                .then(if matches!(self, DiffByteRecord::Delete(..)) {
                    Ordering::Less
                } else {
                    Ordering::Greater
                }),
            (
                LineNum::OneSide(line_num_a),
                LineNum::BothSides {
                    for_deleted: modify_line_del,
                    for_added: modify_line_add,
                },
            ) => line_num_a
                .cmp(if modify_line_del < modify_line_add {
                    &modify_line_del
                } else {
                    &modify_line_add
                })
                .then(if matches!(self, DiffByteRecord::Delete(..)) {
                    Ordering::Less
                } else {
                    Ordering::Greater
                }),
            (
                LineNum::BothSides {
                    for_deleted: modify_line_del,
                    for_added: modify_line_add,
                },
                LineNum::OneSide(line_num_b),
            ) => if modify_line_del < modify_line_add {
                &modify_line_del
            } else {
                &modify_line_add
            }
            .cmp(&line_num_b)
            .then(if matches!(other, DiffByteRecord::Add(..)) {
                Ordering::Less
            } else {
                Ordering::Greater
            }),
            (
                LineNum::BothSides {
                    for_deleted: modify_line_del_a,
                    for_added: modify_line_add_a,
                },
                LineNum::BothSides {
                    for_deleted: modify_line_del_b,
                    for_added: modify_line_add_b,
                },
            ) => if modify_line_del_a < modify_line_add_a {
                &modify_line_del_a
            } else {
                &modify_line_add_a
            }
            .cmp(if modify_line_del_b < modify_line_add_b {
                &modify_line_del_b
            } else {
                &modify_line_add_b
            })
            // this will _never_ compare to `Ordering::Equal`, because
            // one and the same line on one side of the comparison
            // can't be in two different `DiffByteRecord`s
            .then(modify_line_del_a.cmp(&modify_line_del_b)),
        }
    }
}

#[cfg(kani)]
impl kani::Arbitrary for DiffByteRecord {
    fn any() -> Self {
        match kani::any() {
            0 => Self::Add(kani::any()),
            1 => Self::Delete(kani::any()),
            _ => Self::Modify {
                delete: kani::any(),
                add: kani::any(),
                field_indices: vec![],
            },
        }
    }
}

#[cfg_attr(kani, derive(kani::Arbitrary))]
pub enum LineNum {
    OneSide(u64),
    BothSides { for_deleted: u64, for_added: u64 },
}

#[derive(Debug, PartialEq, Clone)]
pub struct ByteRecordLineInfo {
    byte_record: csv::ByteRecord,
    line: u64,
}

impl ByteRecordLineInfo {
    pub fn new(byte_record: csv::ByteRecord, line: u64) -> Self {
        Self { byte_record, line }
    }

    pub fn byte_record(&self) -> &csv::ByteRecord {
        &self.byte_record
    }

    pub fn into_byte_record(self) -> csv::ByteRecord {
        self.byte_record
    }

    pub fn line(&self) -> u64 {
        self.line
    }
}

#[cfg(kani)]
impl kani::Arbitrary for ByteRecordLineInfo {
    fn any() -> Self {
        Self {
            byte_record: csv::ByteRecord::from(kani::vec::any_vec::<[u8; 1], 3>()), //csv::ByteRecord::default(),
            line: kani::any(),
        }
    }
}

#[cfg(kani)]
mod verification {
    use super::*;

    #[kani::proof]
    #[kani::unwind(4)]
    pub fn check_less_than_line_num_one_side() {
        let (a, b): (DiffByteRecord, DiffByteRecord) = (kani::any(), kani::any());
        kani::assume(
            matches!(a.line_num(), LineNum::OneSide(_))
                && matches!(b.line_num(), LineNum::OneSide(_)),
        );
        match (a.line_num(), b.line_num()) {
            (LineNum::OneSide(line_a), LineNum::OneSide(line_b)) => {
                kani::assume(line_a < line_b && line_a > 0 && line_b > 0)
            }
            _ => unreachable!("kani::assume should prevent this arm!"),
        };
        assert!(a.cmp_by_line(&b) == Ordering::Less);
    }

    #[kani::proof]
    #[kani::unwind(4)]
    pub fn check_greater_than_line_num_one_side() {
        let (a, b): (DiffByteRecord, DiffByteRecord) = (kani::any(), kani::any());
        kani::assume(
            matches!(a.line_num(), LineNum::OneSide(_))
                && matches!(b.line_num(), LineNum::OneSide(_)),
        );
        match (a.line_num(), b.line_num()) {
            (LineNum::OneSide(line_a), LineNum::OneSide(line_b)) => {
                kani::assume(line_a > line_b && line_a > 0 && line_b > 0)
            }
            _ => unreachable!("kani::assume should prevent this arm!"),
        };
        assert!(a.cmp_by_line(&b) == Ordering::Greater);
    }

    #[kani::proof]
    #[kani::unwind(4)]
    pub fn check_never_compare_to_equal() {
        let (a, b): (DiffByteRecord, DiffByteRecord) = (kani::any(), kani::any());

        kani_assume_certain_lines_cannot_be_equal(&a, &b);

        assert!(a.cmp_by_line(&b) != Ordering::Equal);
    }

    fn kani_assume_certain_lines_cannot_be_equal(a: &DiffByteRecord, b: &DiffByteRecord) {
        match (a, b) {
            (DiffByteRecord::Add(line_info_a), DiffByteRecord::Add(line_info_b))
            | (DiffByteRecord::Delete(line_info_a), DiffByteRecord::Delete(line_info_b)) => {
                kani::assume(line_info_a.line() != line_info_b.line());
            }
            (
                DiffByteRecord::Add(byte_record_line_info_add),
                DiffByteRecord::Modify {
                    delete: _,
                    add,
                    field_indices: _,
                },
            )
            | (
                DiffByteRecord::Modify {
                    delete: _,
                    add,
                    field_indices: _,
                },
                DiffByteRecord::Add(byte_record_line_info_add),
            ) => {
                kani::assume(byte_record_line_info_add.line() != add.line());
            }
            (
                DiffByteRecord::Modify {
                    delete: delete_a,
                    add: add_a,
                    ..
                },
                DiffByteRecord::Modify {
                    delete: delete_b,
                    add: add_b,
                    ..
                },
            ) => {
                kani::assume(delete_a.line() != delete_b.line() && add_a.line() != add_b.line());
            }
            (
                DiffByteRecord::Modify {
                    delete,
                    add: _,
                    field_indices: _,
                },
                DiffByteRecord::Delete(line_info_delete),
            )
            | (
                DiffByteRecord::Delete(line_info_delete),
                DiffByteRecord::Modify {
                    delete,
                    add: _,
                    field_indices: _,
                },
            ) => {
                kani::assume(delete.line() != line_info_delete.line());
            }
            _ => {
                // no assumptions for the other cases
            }
        }
    }

    #[kani::proof]
    #[kani::unwind(4)]
    pub fn check_comparison_one_side_with_both_sides() {
        let (a, b): (DiffByteRecord, DiffByteRecord) = (kani::any(), kani::any());
        kani::assume(matches!(
            (a.line_num(), b.line_num()),
            (LineNum::OneSide(_), LineNum::BothSides { .. })
                | (LineNum::BothSides { .. }, LineNum::OneSide(_))
        ));
        match (a.line_num(), b.line_num()) {
            (
                LineNum::OneSide(line_a),
                LineNum::BothSides {
                    for_deleted: line_both_del,
                    for_added: line_both_add,
                },
            ) => {
                kani::assume(line_a > 0 && line_both_del > 0 && line_both_add > 0);
                match line_a.cmp(if line_both_del < line_both_add {
                    &line_both_del
                } else {
                    &line_both_add
                }) {
                    o @ Ordering::Less | o @ Ordering::Greater => assert!(a.cmp_by_line(&b) == o),
                    Ordering::Equal => assert!(if matches!(a, DiffByteRecord::Delete(_)) {
                        a.cmp_by_line(&b) == Ordering::Less
                    } else {
                        a.cmp_by_line(&b) == Ordering::Greater
                    }),
                }
            }
            (
                LineNum::BothSides {
                    for_deleted: line_both_del,
                    for_added: line_both_add,
                },
                LineNum::OneSide(line_b),
            ) => {
                kani::assume(line_b > 0 && line_both_del > 0 && line_both_add > 0);
                match line_b.cmp(if line_both_del < line_both_add {
                    &line_both_del
                } else {
                    &line_both_add
                }) {
                    o @ Ordering::Less | o @ Ordering::Greater => assert!(b.cmp_by_line(&a) == o),
                    Ordering::Equal => assert!(if matches!(b, DiffByteRecord::Add(_)) {
                        a.cmp_by_line(&b) == Ordering::Less
                    } else {
                        a.cmp_by_line(&b) == Ordering::Greater
                    }),
                }
            }
            _ => unreachable!("kani::assume should prevent this arm"),
        };
    }

    #[kani::proof]
    #[kani::unwind(5)]
    pub fn check_comparison_both_sides_with_both_sides() {
        let (a, b): (DiffByteRecord, DiffByteRecord) = (kani::any(), kani::any());
        kani::assume(matches!(
            (a.line_num(), b.line_num()),
            (LineNum::BothSides { .. }, LineNum::BothSides { .. })
        ));
        match (a.line_num(), b.line_num()) {
            (
                LineNum::BothSides {
                    for_deleted: line_both_del_a,
                    for_added: line_both_add_a,
                },
                LineNum::BothSides {
                    for_deleted: line_both_del_b,
                    for_added: line_both_add_b,
                },
            ) => {
                kani::assume(
                    [
                        line_both_add_a,
                        line_both_del_a,
                        line_both_add_b,
                        line_both_del_b,
                    ]
                    .into_iter()
                    .all(|x| x > 0)
                        && [
                            (line_both_add_a, line_both_add_b),
                            (line_both_del_a, line_both_del_b),
                        ]
                        .into_iter()
                        .all(|(a, b)| a != b),
                );
                match if line_both_add_a < line_both_del_a {
                    line_both_add_a
                } else {
                    line_both_del_a
                }
                .cmp(if line_both_add_b < line_both_del_b {
                    &line_both_add_b
                } else {
                    &line_both_del_b
                }) {
                    o @ Ordering::Less | o @ Ordering::Greater => {
                        assert!(a.cmp_by_line(&b) == o)
                    }
                    Ordering::Equal => {
                        assert!(line_both_del_a.cmp(&line_both_del_b) == a.cmp_by_line(&b))
                    }
                }
            }
            _ => unreachable!("kani::assume should prevent this arm"),
        };
    }
}