difference_rs/
lib.rs

1//! Functions to find the difference between two texts (strings).
2//!
3//! Usage
4//! ----------
5//!
6//! Add the following to your `Cargo.toml`:
7//!
8//! ```toml
9//! [dependencies]
10//! difference_rs = "3.0"
11//! ```
12//!
13//! Now you can use the crate in your code
14//!
15//!
16//! ## Examples
17//!
18//! See [Examples.md](Examples.md) for more examples.
19//!
20//! ```rust
21//! use difference_rs::{Difference, Changeset};
22//!
23//! let changeset = Changeset::new("test", "tent", "");
24//!
25//! assert_eq!(changeset.diffs, vec![
26//!   Difference::Same("te".to_string()),
27//!   Difference::Rem("s".to_string()),
28//!   Difference::Add("n".to_string()),
29//!   Difference::Same("t".to_string())
30//! ]);
31//! ```
32
33#![crate_name = "difference_rs"]
34#![doc(html_root_url = "http://docs.rs/difference-rs")]
35#![deny(missing_docs)]
36#![deny(warnings)]
37
38mod display;
39mod lcs;
40mod merge;
41mod multi;
42
43use std::char::REPLACEMENT_CHARACTER;
44
45use crate::lcs::lcs;
46use crate::merge::merge;
47
48/// Defines the contents of a changeset
49/// Changesets will be delivered in order of appearance in the original string
50/// Sequences of the same kind will be grouped into one Difference
51#[derive(PartialEq, Eq, Clone, Debug)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub enum Difference {
54    /// Sequences that are the same
55    Same(String),
56    /// Sequences that are an addition (don't appear in the first string)
57    Add(String),
58    /// Sequences that are a removal (don't appear in the second string)
59    Rem(String),
60}
61
62/// The information about a full changeset
63#[derive(Clone, Debug, PartialEq, Eq)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65pub struct Changeset {
66    /// An ordered vector of `Difference` objects, corresponding
67    /// to the differences within the text
68    pub diffs: Vec<Difference>,
69    /// The split used when creating the `Changeset`
70    /// Common splits are `""` for char-level, `" "` for word-level and `"\n"` for line-level.
71    pub split: String,
72    /// The edit distance of the `Changeset`
73    pub distance: i128,
74}
75
76/// The information about a full changeset when regarding a multi split changeset
77#[derive(Clone, Debug, PartialEq, Eq)]
78#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
79pub struct ChangesetMulti {
80    /// An ordered vector of `Difference` objects, corresponding
81    /// to the differences within the text
82    pub diffs: Vec<Difference>,
83    /// The splits used when creating the `Changeset` with their respective indexes in the origin string.
84    pub splits: Vec<(usize, String)>,
85    /// The splits used when creating the `Changeset` with their respective indexes in the edit string.
86    pub edit_splits: Vec<(usize, String)>,
87    /// The edit distance of the `Changeset`
88    pub distance: i128,
89}
90
91impl Changeset {
92    /// Calculates the edit distance and the changeset for two given strings.
93    /// The first string is assumed to be the "original", the second to be an
94    /// edited version of the first. The third parameter specifies how to split
95    /// the input strings, leading to a more or less exact comparison.
96    ///
97    /// Common splits are `""` for char-level, `" "` for word-level and `"\n"` for line-level.
98    ///
99    /// Outputs the edit distance (how much the two strings differ) and a "changeset", that is
100    /// a `Vec` containing `Difference`s.
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// use difference_rs::{Changeset, Difference};
106    ///
107    /// let changeset = Changeset::new("test", "tent", "");
108    ///
109    /// assert_eq!(changeset.diffs, vec![
110    ///     Difference::Same("te".to_string()),
111    ///     Difference::Rem("s".to_string()),
112    ///     Difference::Add("n".to_string()),
113    ///     Difference::Same("t".to_string())
114    /// ]);
115    /// ```
116    #[must_use]
117    pub fn new(orig: &str, edit: &str, split: &str) -> Changeset {
118        let (dist, common) = lcs(orig, edit, split);
119        Changeset {
120            diffs: merge(orig, edit, &common, split),
121            split: split.to_string(),
122            distance: dist,
123        }
124    }
125
126    /// Creates a `Changeset` with multiple possible splits.
127    /// The first string is assumed to be the "original", the second to be an
128    /// edited version of the first. The third parameter specifies how to split
129    /// the input strings, leading to a more or less exact comparison.
130    ///
131    /// Outputs the edit distance (how much the two strings differ), original string splits, edit string splits, a "changeset", that is
132    /// a `Vec` containing `Difference`s.
133    ///
134    /// Obs: Splits are included inside the `Difference` vector, as it is the only way to correctly rebuild strings, which differs from
135    /// `Changeset::new` that all spaces are filled by a single split.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use difference_rs::{Changeset, Difference};
141    ///
142    /// let changeset = Changeset::new_multi(
143    ///    "https://localhost:8080/path?query=value",
144    ///    "https://myapi.com/api/path?query=asset",
145    ///    &["://", "/", "?", "="],
146    /// );
147    ///
148    /// assert_eq!(changeset.diffs, vec![
149    ///     Difference::Same("https://".to_string()),
150    ///     Difference::Rem("localhost:8080/".to_string()),
151    ///     Difference::Add("myapi.com/api/".to_string()),
152    ///     Difference::Same("path?query=".to_string()),
153    ///     Difference::Rem("value".to_string()),
154    ///     Difference::Add("asset".to_string()),
155    /// ]);
156    /// ```
157    #[must_use]
158    pub fn new_multi(orig: &str, edit: &str, splits: &[&str]) -> ChangesetMulti {
159        let matched_splits = splits
160            .iter()
161            .flat_map(|split| orig.match_indices(*split))
162            .map(|(k, v)| (k, v.to_string()))
163            .collect::<Vec<(usize, String)>>();
164        let edit_splits = splits
165            .iter()
166            .flat_map(|split| edit.match_indices(*split))
167            .map(|(k, v)| (k, v.to_string()))
168            .collect::<Vec<(usize, String)>>();
169
170        let mut aux_orig = orig.to_string();
171        let mut aux_edit = edit.to_string();
172        let replacement = REPLACEMENT_CHARACTER.to_string();
173        for split in splits {
174            aux_orig = aux_orig.replace(split, &replacement);
175            aux_edit = aux_edit.replace(split, &replacement);
176        }
177
178        let changeset = Changeset::new(&aux_orig, &aux_edit, &replacement);
179        ChangesetMulti::from((changeset, matched_splits, edit_splits))
180    }
181}
182
183/// Assert the difference between two strings. Works like diff, but takes
184/// a fourth parameter that is the expected edit distance (e.g. 0 if you want to
185/// test for equality).
186///
187/// Remember that edit distance might not be equal to your understanding of difference,
188/// for example the words "Rust" and "Dust" have an edit distance of 2 because two changes (a
189/// removal and an addition) are required to make them look the same.
190///
191/// Will print an error with a colorful diff in case of failure.
192#[macro_export]
193macro_rules! assert_diff {
194    ($orig:expr_2021 , $edit:expr_2021, $split: expr_2021, $expected: expr_2021) => {{
195        let orig = $orig;
196        let edit = $edit;
197
198        let changeset = $crate::Changeset::new(orig, edit, &($split));
199        if changeset.distance != $expected {
200            println!("{}", changeset);
201            panic!(
202                "assertion failed: edit distance between {:?} and {:?} is {} and not {}, see \
203                    diffset above",
204                orig,
205                edit,
206                changeset.distance,
207                &($expected)
208            )
209        }
210    }};
211}
212
213#[test]
214fn test_diff() {
215    let text1 = "Roses are red, violets are blue,\n\
216                 I wrote this library,\n\
217                 just for you.\n\
218                 (It's true).";
219
220    let text2 = "Roses are red, violets are blue,\n\
221                 I wrote this documentation,\n\
222                 just for you.\n\
223                 (It's quite true).";
224
225    let changeset = Changeset::new(text1, text2, "\n");
226
227    assert_eq!(changeset.distance, 4);
228
229    assert_eq!(
230        changeset.diffs,
231        vec![
232            Difference::Same("Roses are red, violets are blue,".to_string()),
233            Difference::Rem("I wrote this library,".to_string()),
234            Difference::Add("I wrote this documentation,".to_string()),
235            Difference::Same("just for you.".to_string()),
236            Difference::Rem("(It's true).".to_string()),
237            Difference::Add("(It's quite true).".to_string()),
238        ]
239    );
240}
241
242#[test]
243fn test_diff_brief() {
244    let text1 = "Hello\nworld";
245    let text2 = "Ola\nmundo";
246
247    let changeset = Changeset::new(text1, text2, "\n");
248
249    assert_eq!(
250        changeset.diffs,
251        vec![
252            Difference::Rem("Hello\nworld".to_string()),
253            Difference::Add("Ola\nmundo".to_string()),
254        ]
255    );
256}
257
258#[test]
259#[cfg(feature = "serde")]
260fn test_diff_smaller_line_count_on_left() {
261    let text1 = "Hello\nworld";
262    let text2 = "Ola\nworld\nHow is it\ngoing?";
263
264    let changeset = Changeset::new(text1, text2, "\n");
265
266    assert_eq!(
267        changeset.diffs,
268        vec![
269            Difference::Rem("Hello".to_string()),
270            Difference::Add("Ola".to_string()),
271            Difference::Same("world".to_string()),
272            Difference::Add("How is it\ngoing?".to_string()),
273        ]
274    );
275
276    let json = serde_json::to_string(&changeset).unwrap();
277
278    assert_eq!(
279        json,
280        r#"{"diffs":[{"Rem":"Hello"},{"Add":"Ola"},{"Same":"world"},{"Add":"How is it\ngoing?"}],"split":"\n","distance":4}"#
281    );
282}
283
284#[test]
285fn test_diff_smaller_line_count_on_right() {
286    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
287    let text2 = "Ola\nworld";
288
289    let changeset = Changeset::new(text1, text2, "\n");
290
291    assert_eq!(
292        changeset.diffs,
293        vec![
294            Difference::Rem("Hello".to_string()),
295            Difference::Add("Ola".to_string()),
296            Difference::Same("world".to_string()),
297            Difference::Rem("What a \nbeautiful\nday!".to_string()),
298        ]
299    );
300}
301
302#[test]
303fn test_diff_similar_text_with_smaller_line_count_on_right() {
304    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
305    let text2 = "Hello\nwoRLd";
306
307    let changeset = Changeset::new(text1, text2, "\n");
308
309    assert_eq!(
310        changeset.diffs,
311        vec![
312            Difference::Same("Hello".to_string()),
313            Difference::Rem("world\nWhat a \nbeautiful\nday!".to_string()),
314            Difference::Add("woRLd".to_string()),
315        ]
316    );
317}
318
319#[test]
320fn test_diff_similar_text_with_similar_line_count() {
321    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
322    let text2 = "Hello\nwoRLd\nbeautiful";
323
324    let changeset = Changeset::new(text1, text2, "\n");
325
326    assert_eq!(
327        changeset.diffs,
328        vec![
329            Difference::Same("Hello".to_string()),
330            Difference::Rem("world\nWhat a ".to_string()),
331            Difference::Add("woRLd".to_string()),
332            Difference::Same("beautiful".to_string()),
333            Difference::Rem("day!".to_string()),
334        ]
335    );
336}
337
338#[test]
339#[should_panic = r#"assertion failed: edit distance between "Roses are red, violets are blue,\nI wrote this library,\njust for you.\n(It's true)." and "Roses are red, violets are blue,\nI wrote this documentation,\njust for you.\n(It's quite true)." is 2 and not 0, see diffset above"#]
340fn test_assert_diff_panic() {
341    let text1 = "Roses are red, violets are blue,\n\
342                 I wrote this library,\n\
343                 just for you.\n\
344                 (It's true).";
345
346    let text2 = "Roses are red, violets are blue,\n\
347                 I wrote this documentation,\n\
348                 just for you.\n\
349                 (It's quite true).";
350
351    assert_diff!(text1, text2, "\n'", 0);
352}
353
354#[test]
355fn test_assert_diff() {
356    let text1 = "Roses are red, violets are blue";
357
358    let text2 = "Roses are green, violets are blue";
359
360    assert_diff!(text1, text2, " ", 2);
361}
362
363#[test]
364fn test_multi_pattern() {
365    let cg = Changeset::new_multi("hello,world now", "hellow,world later", &[",", " "]);
366    let expected = ChangesetMulti {
367        diffs: vec![
368            Difference::Rem("hello,".to_string()),
369            Difference::Add("hellow,".to_string()),
370            Difference::Same("world ".to_string()),
371            Difference::Rem("now".to_string()),
372            Difference::Add("later".to_string()),
373        ],
374        splits: vec![(5, ",".to_string()), (11, " ".to_string())],
375        edit_splits: vec![(6, ",".to_string()), (12, " ".to_string())],
376        distance: 4,
377    };
378
379    assert_eq!(cg, expected);
380}
381
382#[test]
383fn test_multi_uri_pattern() {
384    let cg = Changeset::new_multi(
385        "https://localhost:8080/path?query=value",
386        "https://myapi.com/api/path?query=asset",
387        &["://", "/", "?", "="],
388    );
389    let expected = ChangesetMulti {
390        diffs: vec![
391            Difference::Same("https://".to_string()),
392            Difference::Rem("localhost:8080/".to_string()),
393            Difference::Add("myapi.com/api/".to_string()),
394            Difference::Same("path?query=".to_string()),
395            Difference::Rem("value".to_string()),
396            Difference::Add("asset".to_string()),
397        ],
398        splits: vec![
399            (5, "://".to_string()),
400            (6, "/".to_string()),
401            (7, "/".to_string()),
402            (22, "/".to_string()),
403            (27, "?".to_string()),
404            (33, "=".to_string()),
405        ],
406        edit_splits: vec![
407            (5, "://".to_string()),
408            (6, "/".to_string()),
409            (7, "/".to_string()),
410            (17, "/".to_string()),
411            (21, "/".to_string()),
412            (26, "?".to_string()),
413            (32, "=".to_string()),
414        ],
415        distance: 5,
416    };
417
418    assert_eq!(cg, expected);
419}