difference_rs/
lib.rs

1//! Functions to find the difference between two texts (strings).
2//!
3//! Usage
4//! ----------
5//!
6//! Add the following to your `Cargo.toml`:
7//!
8//! ```toml
9//! [dependencies]
10//! difference_rs = "3.0"
11//! ```
12//!
13//! Now you can use the crate in your code
14//!
15//!
16//! ## Examples
17//!
18//! See [Examples.md](Examples.md) for more examples.
19//!
20//! ```rust
21//! use difference_rs::{Difference, Changeset};
22//!
23//! let changeset = Changeset::new("test", "tent", "");
24//!
25//! assert_eq!(changeset.diffs, vec![
26//!   Difference::Same("te".to_string()),
27//!   Difference::Rem("s".to_string()),
28//!   Difference::Add("n".to_string()),
29//!   Difference::Same("t".to_string())
30//! ]);
31//! ```
32
33#![crate_name = "difference_rs"]
34#![doc(html_root_url = "http://docs.rs/difference-rs")]
35#![deny(missing_docs)]
36#![deny(warnings)]
37
38mod display;
39mod lcs;
40mod merge;
41mod multi;
42
43use std::char::REPLACEMENT_CHARACTER;
44
45use crate::lcs::lcs;
46use crate::merge::merge;
47
48/// Defines the contents of a changeset
49/// Changesets will be delivered in order of appearance in the original string
50/// Sequences of the same kind will be grouped into one Difference
51#[derive(PartialEq, Eq, Clone, Debug)]
52pub enum Difference {
53    /// Sequences that are the same
54    Same(String),
55    /// Sequences that are an addition (don't appear in the first string)
56    Add(String),
57    /// Sequences that are a removal (don't appear in the second string)
58    Rem(String),
59}
60
61/// The information about a full changeset
62#[derive(Clone, Debug, PartialEq, Eq)]
63pub struct Changeset {
64    /// An ordered vector of `Difference` objects, corresponding
65    /// to the differences within the text
66    pub diffs: Vec<Difference>,
67    /// The split used when creating the `Changeset`
68    /// Common splits are `""` for char-level, `" "` for word-level and `"\n"` for line-level.
69    pub split: String,
70    /// The edit distance of the `Changeset`
71    pub distance: i128,
72}
73
74/// The information about a full changeset when regarding a multi split changeset
75#[derive(Clone, Debug, PartialEq, Eq)]
76pub struct ChangesetMulti {
77    /// An ordered vector of `Difference` objects, corresponding
78    /// to the differences within the text
79    pub diffs: Vec<Difference>,
80    /// The splits used when creating the `Changeset` with their respective indexes in the origin string.
81    pub splits: Vec<(usize, String)>,
82    /// The splits used when creating the `Changeset` with their respective indexes in the edit string.
83    pub edit_splits: Vec<(usize, String)>,
84    /// The edit distance of the `Changeset`
85    pub distance: i128,
86}
87
88impl Changeset {
89    /// Calculates the edit distance and the changeset for two given strings.
90    /// The first string is assumed to be the "original", the second to be an
91    /// edited version of the first. The third parameter specifies how to split
92    /// the input strings, leading to a more or less exact comparison.
93    ///
94    /// Common splits are `""` for char-level, `" "` for word-level and `"\n"` for line-level.
95    ///
96    /// Outputs the edit distance (how much the two strings differ) and a "changeset", that is
97    /// a `Vec` containing `Difference`s.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// use difference_rs::{Changeset, Difference};
103    ///
104    /// let changeset = Changeset::new("test", "tent", "");
105    ///
106    /// assert_eq!(changeset.diffs, vec![
107    ///     Difference::Same("te".to_string()),
108    ///     Difference::Rem("s".to_string()),
109    ///     Difference::Add("n".to_string()),
110    ///     Difference::Same("t".to_string())
111    /// ]);
112    /// ```
113    #[must_use]
114    pub fn new(orig: &str, edit: &str, split: &str) -> Changeset {
115        let (dist, common) = lcs(orig, edit, split);
116        Changeset {
117            diffs: merge(orig, edit, &common, split),
118            split: split.to_string(),
119            distance: dist,
120        }
121    }
122
123    /// Creates a `Changeset` with multiple possible splits.
124    /// The first string is assumed to be the "original", the second to be an
125    /// edited version of the first. The third parameter specifies how to split
126    /// the input strings, leading to a more or less exact comparison.
127    ///
128    /// Outputs the edit distance (how much the two strings differ), original string splits, edit string splits, a "changeset", that is
129    /// a `Vec` containing `Difference`s.
130    ///
131    /// Obs: Splits are included inside the `Difference` vector, as it is the only way to correctly rebuild strings, which differs from
132    /// `Changeset::new` that all spaces are filled by a single split.
133    ///
134    /// # Examples
135    ///
136    /// ```
137    /// use difference_rs::{Changeset, Difference};
138    ///
139    /// let changeset = Changeset::new_multi(
140    ///    "https://localhost:8080/path?query=value",
141    ///    "https://myapi.com/api/path?query=asset",
142    ///    &["://", "/", "?", "="],
143    /// );
144    ///
145    /// assert_eq!(changeset.diffs, vec![
146    ///     Difference::Same("https://".to_string()),
147    ///     Difference::Rem("localhost:8080/".to_string()),
148    ///     Difference::Add("myapi.com/api/".to_string()),
149    ///     Difference::Same("path?query=".to_string()),
150    ///     Difference::Rem("value".to_string()),
151    ///     Difference::Add("asset".to_string()),
152    /// ]);
153    /// ```
154    #[must_use]
155    pub fn new_multi(orig: &str, edit: &str, splits: &[&str]) -> ChangesetMulti {
156        let matched_splits = splits
157            .iter()
158            .flat_map(|split| orig.match_indices(*split))
159            .map(|(k, v)| (k, v.to_string()))
160            .collect::<Vec<(usize, String)>>();
161        let edit_splits = splits
162            .iter()
163            .flat_map(|split| edit.match_indices(*split))
164            .map(|(k, v)| (k, v.to_string()))
165            .collect::<Vec<(usize, String)>>();
166
167        let mut aux_orig = orig.to_string();
168        let mut aux_edit = edit.to_string();
169        let replacement = REPLACEMENT_CHARACTER.to_string();
170        for split in splits {
171            aux_orig = aux_orig.replace(split, &replacement);
172            aux_edit = aux_edit.replace(split, &replacement);
173        }
174
175        let changeset = Changeset::new(&aux_orig, &aux_edit, &replacement);
176        ChangesetMulti::from((changeset, matched_splits, edit_splits))
177    }
178}
179
180/// Assert the difference between two strings. Works like diff, but takes
181/// a fourth parameter that is the expected edit distance (e.g. 0 if you want to
182/// test for equality).
183///
184/// Remember that edit distance might not be equal to your understanding of difference,
185/// for example the words "Rust" and "Dust" have an edit distance of 2 because two changes (a
186/// removal and an addition) are required to make them look the same.
187///
188/// Will print an error with a colorful diff in case of failure.
189#[macro_export]
190macro_rules! assert_diff {
191    ($orig:expr_2021 , $edit:expr_2021, $split: expr_2021, $expected: expr_2021) => {{
192        let orig = $orig;
193        let edit = $edit;
194
195        let changeset = $crate::Changeset::new(orig, edit, &($split));
196        if changeset.distance != $expected {
197            println!("{}", changeset);
198            panic!(
199                "assertion failed: edit distance between {:?} and {:?} is {} and not {}, see \
200                    diffset above",
201                orig,
202                edit,
203                changeset.distance,
204                &($expected)
205            )
206        }
207    }};
208}
209
210#[test]
211fn test_diff() {
212    let text1 = "Roses are red, violets are blue,\n\
213                 I wrote this library,\n\
214                 just for you.\n\
215                 (It's true).";
216
217    let text2 = "Roses are red, violets are blue,\n\
218                 I wrote this documentation,\n\
219                 just for you.\n\
220                 (It's quite true).";
221
222    let changeset = Changeset::new(text1, text2, "\n");
223
224    assert_eq!(changeset.distance, 4);
225
226    assert_eq!(
227        changeset.diffs,
228        vec![
229            Difference::Same("Roses are red, violets are blue,".to_string()),
230            Difference::Rem("I wrote this library,".to_string()),
231            Difference::Add("I wrote this documentation,".to_string()),
232            Difference::Same("just for you.".to_string()),
233            Difference::Rem("(It's true).".to_string()),
234            Difference::Add("(It's quite true).".to_string()),
235        ]
236    );
237}
238
239#[test]
240fn test_diff_brief() {
241    let text1 = "Hello\nworld";
242    let text2 = "Ola\nmundo";
243
244    let changeset = Changeset::new(text1, text2, "\n");
245
246    assert_eq!(
247        changeset.diffs,
248        vec![
249            Difference::Rem("Hello\nworld".to_string()),
250            Difference::Add("Ola\nmundo".to_string()),
251        ]
252    );
253}
254
255#[test]
256fn test_diff_smaller_line_count_on_left() {
257    let text1 = "Hello\nworld";
258    let text2 = "Ola\nworld\nHow is it\ngoing?";
259
260    let changeset = Changeset::new(text1, text2, "\n");
261
262    assert_eq!(
263        changeset.diffs,
264        vec![
265            Difference::Rem("Hello".to_string()),
266            Difference::Add("Ola".to_string()),
267            Difference::Same("world".to_string()),
268            Difference::Add("How is it\ngoing?".to_string()),
269        ]
270    );
271}
272
273#[test]
274fn test_diff_smaller_line_count_on_right() {
275    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
276    let text2 = "Ola\nworld";
277
278    let changeset = Changeset::new(text1, text2, "\n");
279
280    assert_eq!(
281        changeset.diffs,
282        vec![
283            Difference::Rem("Hello".to_string()),
284            Difference::Add("Ola".to_string()),
285            Difference::Same("world".to_string()),
286            Difference::Rem("What a \nbeautiful\nday!".to_string()),
287        ]
288    );
289}
290
291#[test]
292fn test_diff_similar_text_with_smaller_line_count_on_right() {
293    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
294    let text2 = "Hello\nwoRLd";
295
296    let changeset = Changeset::new(text1, text2, "\n");
297
298    assert_eq!(
299        changeset.diffs,
300        vec![
301            Difference::Same("Hello".to_string()),
302            Difference::Rem("world\nWhat a \nbeautiful\nday!".to_string()),
303            Difference::Add("woRLd".to_string()),
304        ]
305    );
306}
307
308#[test]
309fn test_diff_similar_text_with_similar_line_count() {
310    let text1 = "Hello\nworld\nWhat a \nbeautiful\nday!";
311    let text2 = "Hello\nwoRLd\nbeautiful";
312
313    let changeset = Changeset::new(text1, text2, "\n");
314
315    assert_eq!(
316        changeset.diffs,
317        vec![
318            Difference::Same("Hello".to_string()),
319            Difference::Rem("world\nWhat a ".to_string()),
320            Difference::Add("woRLd".to_string()),
321            Difference::Same("beautiful".to_string()),
322            Difference::Rem("day!".to_string()),
323        ]
324    );
325}
326
327#[test]
328#[should_panic = r#"assertion failed: edit distance between "Roses are red, violets are blue,\nI wrote this library,\njust for you.\n(It's true)." and "Roses are red, violets are blue,\nI wrote this documentation,\njust for you.\n(It's quite true)." is 2 and not 0, see diffset above"#]
329fn test_assert_diff_panic() {
330    let text1 = "Roses are red, violets are blue,\n\
331                 I wrote this library,\n\
332                 just for you.\n\
333                 (It's true).";
334
335    let text2 = "Roses are red, violets are blue,\n\
336                 I wrote this documentation,\n\
337                 just for you.\n\
338                 (It's quite true).";
339
340    assert_diff!(text1, text2, "\n'", 0);
341}
342
343#[test]
344fn test_assert_diff() {
345    let text1 = "Roses are red, violets are blue";
346
347    let text2 = "Roses are green, violets are blue";
348
349    assert_diff!(text1, text2, " ", 2);
350}
351
352#[test]
353fn test_multi_pattern() {
354    let cg = Changeset::new_multi("hello,world now", "hellow,world later", &[",", " "]);
355    let expected = ChangesetMulti {
356        diffs: vec![
357            Difference::Rem("hello,".to_string()),
358            Difference::Add("hellow,".to_string()),
359            Difference::Same("world ".to_string()),
360            Difference::Rem("now".to_string()),
361            Difference::Add("later".to_string()),
362        ],
363        splits: vec![(5, ",".to_string()), (11, " ".to_string())],
364        edit_splits: vec![(6, ",".to_string()), (12, " ".to_string())],
365        distance: 4,
366    };
367
368    assert_eq!(cg, expected);
369}
370
371#[test]
372fn test_multi_uri_pattern() {
373    let cg = Changeset::new_multi(
374        "https://localhost:8080/path?query=value",
375        "https://myapi.com/api/path?query=asset",
376        &["://", "/", "?", "="],
377    );
378    let expected = ChangesetMulti {
379        diffs: vec![
380            Difference::Same("https://".to_string()),
381            Difference::Rem("localhost:8080/".to_string()),
382            Difference::Add("myapi.com/api/".to_string()),
383            Difference::Same("path?query=".to_string()),
384            Difference::Rem("value".to_string()),
385            Difference::Add("asset".to_string()),
386        ],
387        splits: vec![
388            (5, "://".to_string()),
389            (6, "/".to_string()),
390            (7, "/".to_string()),
391            (22, "/".to_string()),
392            (27, "?".to_string()),
393            (33, "=".to_string()),
394        ],
395        edit_splits: vec![
396            (5, "://".to_string()),
397            (6, "/".to_string()),
398            (7, "/".to_string()),
399            (17, "/".to_string()),
400            (21, "/".to_string()),
401            (26, "?".to_string()),
402            (32, "=".to_string()),
403        ],
404        distance: 5,
405    };
406
407    assert_eq!(cg, expected);
408}