lb-rs 26.4.13

The rust library for interacting with your lockbook.
Documentation
pub mod buffer;
pub mod offset_types;
pub mod operation_types;
pub mod unicode_segs;

use offset_types::{DocByteOffset, RangeExt as _};
use operation_types::Replace;

use similar::DiffableStrRef as _;
use unicode_segmentation::UnicodeSegmentation as _;

pub fn diff(from: &str, to: &str) -> Vec<Replace> {
    let mut result = Vec::new();

    let from_segs = unicode_segs::calc(from);
    let to_segs = unicode_segs::calc(to);

    let mut from_words: Vec<_> = from
        .split_word_bound_indices()
        .map(|(idx, _)| DocByteOffset(idx))
        .collect();
    from_words.push(DocByteOffset(from.len()));

    let mut to_words: Vec<_> = to
        .split_word_bound_indices()
        .map(|(idx, _)| DocByteOffset(idx))
        .collect();
    to_words.push(DocByteOffset(to.len()));

    let diff = similar::TextDiff::configure()
        .algorithm(similar::Algorithm::Myers)
        .diff_unicode_words(from.as_diffable_str(), to.as_diffable_str());

    for diff_op in diff.ops().iter().cloned() {
        match diff_op {
            similar::DiffOp::Equal { .. } => {}
            similar::DiffOp::Delete { old_index, old_len, .. } => {
                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
                    - from_segs.offset_to_char(from_words[old_index]);
                let old_index = from_segs.offset_to_char(from_words[old_index]);

                let mut extended = false;
                if let Some(op) = result.last_mut() {
                    let Replace { range, .. } = op;
                    if range.1 == old_index {
                        range.1 = old_index + old_len;
                        extended = true;
                    }
                }

                if !extended {
                    let op =
                        Replace { range: (old_index, old_index + old_len), text: String::new() };
                    result.push(op);
                }
            }
            similar::DiffOp::Insert { old_index, new_index, new_len } => {
                let old_index = from_segs.offset_to_char(from_words[old_index]);
                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
                    - to_segs.offset_to_char(to_words[new_index]);
                let new_index = to_segs.offset_to_char(to_words[new_index]);

                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();

                let mut extended = false;
                if let Some(op) = result.last_mut() {
                    let Replace { range, text } = op;
                    if range.1 == old_index {
                        text.push_str(&new_text);
                        extended = true;
                    }
                }

                if !extended {
                    let op = Replace { range: (old_index, old_index), text: new_text };
                    result.push(op);
                }
            }
            similar::DiffOp::Replace { old_index, old_len, new_index, new_len } => {
                let old_len = from_segs.offset_to_char(from_words[old_index + old_len])
                    - from_segs.offset_to_char(from_words[old_index]);
                let old_index = from_segs.offset_to_char(from_words[old_index]);
                let new_len = to_segs.offset_to_char(to_words[new_index + new_len])
                    - to_segs.offset_to_char(to_words[new_index]);
                let new_index = to_segs.offset_to_char(to_words[new_index]);

                let new_text_range = to_segs.range_to_byte((new_index, new_index + new_len));
                let new_text = to[new_text_range.start().0..new_text_range.end().0].to_string();

                let mut extended = false;
                if let Some(op) = result.last_mut() {
                    let Replace { range, text } = op;
                    if range.1 == old_index {
                        range.1 = old_index + old_len;
                        text.push_str(&new_text);
                        extended = true;
                    }
                }

                if !extended {
                    let op = Replace { range: (old_index, old_index + old_len), text: new_text };
                    result.push(op);
                }
            }
        }
    }
    result
}

#[cfg(test)]
mod test {
    use rand::rngs::StdRng;
    use rand::{Rng as _, SeedableRng as _};

    #[test]
    fn diff_full_replace() {
        let from = "Hello";
        let to = "Goodbye";

        let result = super::diff(from, to);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].range, (0.into(), 5.into()));
        assert_eq!(result[0].text, "Goodbye");
    }

    #[test]
    fn diff_partial_replace() {
        let from = "Hello, world!";
        let to = "Hello, Rust!";

        let result = super::diff(from, to);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].range, (7.into(), 12.into()));
        assert_eq!(result[0].text, "Rust");
    }

    #[test]
    fn diff_fuzz() {
        let mut count = 0;
        let mut rng = StdRng::seed_from_u64(0);
        loop {
            let from: String = rand_str(&mut rng, rand::random::<usize>() % 10);
            let to: String = rand_str(&mut rng, rand::random::<usize>() % 10);
            let _ = super::diff(&from, &to);
            count += 1;
            if count == 1000 {
                break;
            }
        }
    }

    fn rand_str(rng: &mut StdRng, length: usize) -> String {
        let unicode_string: String = (0..length)
            .map(|_| {
                let code_point = rng.gen_range(0x0020..=0xD7FF);
                std::char::from_u32(code_point).unwrap_or('?')
            })
            .collect();
        unicode_string
    }
}