tor_consdiff/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45#![allow(clippy::collapsible_if)] // See arti#2342
46#![deny(clippy::unused_async)]
47//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
48
49use std::fmt::{Display, Formatter, Write};
50use std::num::NonZeroUsize;
51use std::str::FromStr;
52
53mod err;
54use digest::Digest;
55pub use err::Error;
56use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
57use tor_error::internal;
58use tor_netdoc::parse2::{ErrorProblem, ItemStream, KeywordRef, ParseError, ParseInput};
59
60use crate::err::GenEdDiffError;
61
62/// Result type used by this crate
63type Result<T> = std::result::Result<T, Error>;
64
65/// The keyword that identifies a directory signature line.
66// TODO: We probably want this in tor-netdoc.
67const DIRECTORY_SIGNATURE_KEYWORD: KeywordRef = KeywordRef::new_const("directory-signature");
68
69/// When hashing the signed part of the consensus, append this tail to the end.
70const CONSENSUS_SIGNED_SHA3_256_HASH_TAIL: &str = "directory-signature ";
71
72// Do not compile if we cannot safely convert a u32 into a usize.
73static_assertions::const_assert!(std::mem::size_of::<usize>() >= std::mem::size_of::<u32>());
74
75/// Generates a consensus diff.
76///
77/// This implementation is different from the one in CTor, because it uses a
78/// different algorithm, namely [`Algorithm::Myers`] from the [`imara_diff`]
79/// crate, which is more efficient than CTor in terms of runtime and about as
80/// equally efficient as CTor in output size.
81///
82/// The CTor implementation makes heavy use of the fact that the input is a
83/// valid consensus and that the routers in it are ordered.  This allows for
84/// some divide-and-conquer mechanisms and the cost of requiring more parsing.
85///
86/// Here, we only minimally parse the consensus, in order to only obtain the
87/// first `directory-signature` item and to cut everything including itself off
88/// from the input, as demanded by the specification.
89///
90/// All outputs of this function are guaranteed to work with this
91/// [`apply_diff()`] implementation as a check is performed before returning,
92/// because returning an unusable diff would be terrible.
93pub fn gen_cons_diff(base: &str, target: &str) -> Result<String> {
94    // Throw away the signatures.
95    let (base_signed, _) = split_directory_signatures(base)?;
96    let base_lines = base_signed.chars().filter(|c| *c == '\n').count() + 1;
97
98    // Compute the hashes for the header.
99    let base_signed_hash = hex::encode_upper({
100        let mut h = tor_llcrypto::d::Sha3_256::new();
101        h.update(base_signed);
102        h.update(CONSENSUS_SIGNED_SHA3_256_HASH_TAIL);
103        h.finalize()
104    });
105    let target_hash = hex::encode_upper(tor_llcrypto::d::Sha3_256::digest(target.as_bytes()));
106
107    // Compose the result with header.
108    let ed_diff = gen_ed_diff(base_signed, target).map_err(|e| match e {
109        GenEdDiffError::MissingUnixLineEnding { lno } => Error::InvalidInput(ParseError::new(
110            ErrorProblem::OtherBadDocument("line does not end with '\\n'"),
111            "consdiff",
112            "",
113            lno,
114            None,
115        )),
116        GenEdDiffError::ContainsDotLine { lno } => Error::InvalidInput(ParseError::new(
117            ErrorProblem::OtherBadDocument("contains dotline"),
118            "consdiff",
119            "",
120            lno,
121            None,
122        )),
123        GenEdDiffError::Write(_) => internal!("string write was not infallible?").into(),
124    })?;
125
126    let result = format!(
127        "network-status-diff-version 1\n\
128        hash {base_signed_hash} {target_hash}\n\
129        {base_lines},$d\n\
130        {ed_diff}"
131    );
132
133    // Ensure it is valid, refuse to emit an invalid diff.
134    let check = apply_diff(base, &result, None).map_err(|_| internal!("apply call failed"))?;
135    if check.to_string() != target {
136        Err(internal!("result does not match?"))?;
137    }
138
139    Ok(result)
140}
141
142/// Splits `input` at the first `directory-signature`.
143fn split_directory_signatures(input: &str) -> Result<(&str, &str)> {
144    let parse_input = ParseInput::new(input, "");
145    let mut items = ItemStream::new(&parse_input)?;
146
147    // Parse the consensus item by item until the first `directory-signature`.
148    loop {
149        // We only peek in order to get the proper byte offset.
150        // This is required because doing next() and breaking in the case of
151        // a `directory-signature` would then lead to `.byte_offset()` yielding
152        // the start of the second signature and not the start of the first one.
153        let item = items
154            .peek_keyword()
155            .map_err(|e| ParseError::new(e, "consdiff", "", items.lno_for_error(), None))?;
156
157        match item {
158            Some(DIRECTORY_SIGNATURE_KEYWORD) => {
159                let offset = items.byte_position();
160                return Ok((&input[..offset], &input[offset..]));
161            }
162            Some(_) => {
163                // Consume the just peeked item.
164                let _ = items.next();
165            }
166            None => {
167                // We are finished.
168                return Err(Error::InvalidInput(ParseError::new(
169                    ErrorProblem::MissingItem {
170                        keyword: DIRECTORY_SIGNATURE_KEYWORD.as_str(),
171                    },
172                    "consdiff",
173                    "",
174                    items.lno_for_error(),
175                    None,
176                )));
177            }
178        }
179    }
180}
181
182/// Generates an input agnostic ed diff.
183///
184/// This function does the general logic of [`gen_cons_diff()`] but works in a
185/// document agnostic fashion.
186fn gen_ed_diff(base: &str, target: &str) -> std::result::Result<String, GenEdDiffError> {
187    let mut result = String::new();
188
189    // We use Myers' algorithm as benchmarks have shown that it provides an
190    // equal diff size as the ctor one while keeping an acceptable performance.
191    let input = InternedInput::new(base, target);
192    let mut diff = Diff::compute(Algorithm::Myers, &input);
193    diff.postprocess_lines(&input);
194
195    // Iterate through every a hunk, with a hunk being a block of changes.
196    let hunks = diff.hunks().collect::<Vec<_>>();
197    for hunk in hunks.into_iter().rev() {
198        // Format the header.
199        let hunk_type = HunkType::determine(&hunk);
200        match hunk_type {
201            // No need to do +1 because append is AFTER.
202            HunkType::Append => writeln!(result, "{}{hunk_type}", hunk.before.start)?,
203            HunkType::Delete | HunkType::Change => {
204                if hunk.before.start + 1 == hunk.before.end {
205                    // +1 because 1-indexed.
206                    writeln!(result, "{}{hunk_type}", hunk.before.start + 1)?;
207                } else {
208                    // +1 because 1-indexed; no need to do +1 on end because
209                    // the range is inclusive.
210                    writeln!(
211                        result,
212                        "{},{}{hunk_type}",
213                        hunk.before.start + 1,
214                        hunk.before.end
215                    )?;
216                }
217            }
218        }
219
220        // Format the body.
221        match hunk_type {
222            HunkType::Append | HunkType::Change => {
223                let range = (hunk.after.start)..(hunk.after.end);
224                let tlines = range
225                    .map(|idx| {
226                        let idx = usize::try_from(idx).expect("32-bit static assertion violated?");
227                        input.interner[input.after[idx]]
228                    })
229                    .collect::<Vec<_>>();
230
231                for (lno, line) in tlines.iter().copied().enumerate() {
232                    // Check that all lines end with a Unix line ending.
233                    if line.ends_with("\r\n") || !line.ends_with("\n") {
234                        // +1 because 1-indexed.
235                        return Err(GenEdDiffError::MissingUnixLineEnding { lno: lno + 1 });
236                    }
237
238                    // Check for lines consisting of a single dot plus trailing
239                    // whitespace characters.  No need to bother about "\r\n",
240                    // because we checked that one above.  Although technically
241                    // lines such as `. \n` are possible and understood
242                    // as part of ed diffs, they are not legal in tor netdocs, and
243                    // we want to be more defensive here for now; if it becomes a
244                    // problem, we may remove it later.
245                    if line.trim_end() == "." {
246                        // +1 because 1-indexed.
247                        return Err(GenEdDiffError::ContainsDotLine { lno: lno + 1 });
248                    }
249
250                    // All lines are newline terminated, no need to use writeln!
251                    write!(result, "{line}")?;
252                }
253
254                // Write the terminating dot.
255                writeln!(result, ".")?;
256            }
257            HunkType::Delete => {}
258        }
259    }
260
261    Ok(result)
262}
263
264/// The operational type of the hunk.
265#[derive(Clone, Copy, Debug, derive_more::Display)]
266enum HunkType {
267    /// This is a pure appending.
268    #[display("a")]
269    Append,
270    /// This is a pure deletion.
271    #[display("d")]
272    Delete,
273    /// This is change with potential additions and deletions.
274    #[display("c")]
275    Change,
276}
277
278impl HunkType {
279    /// Determines the type of the hunk.
280    fn determine(hunk: &Hunk) -> Self {
281        if hunk.is_pure_insertion() {
282            Self::Append
283        } else if hunk.is_pure_removal() {
284            Self::Delete
285        } else {
286            Self::Change
287        }
288    }
289}
290
291/// Return true if `s` looks more like a consensus diff than some other kind
292/// of document.
293pub fn looks_like_diff(s: &str) -> bool {
294    s.starts_with("network-status-diff-version")
295}
296
297/// Apply a given diff to an input text, and return the result from applying
298/// that diff.
299///
300/// This is a slow version, for testing and correctness checking.  It uses
301/// an O(n) operation to apply diffs, and therefore runs in O(n^2) time.
302#[cfg(any(test, feature = "slow-diff-apply"))]
303pub fn apply_diff_trivial<'a>(input: &'a str, diff: &'a str) -> Result<DiffResult<'a>> {
304    let mut diff_lines = diff.lines();
305    let (_, d2) = parse_diff_header(&mut diff_lines)?;
306
307    let mut diffable = DiffResult::from_str(input, d2);
308
309    for command in DiffCommandIter::new(diff_lines) {
310        command?.apply_to(&mut diffable)?;
311    }
312
313    Ok(diffable)
314}
315
316/// Apply a given diff to an input text, and return the result from applying
317/// that diff.
318///
319/// If `check_digest_in` is provided, require the diff to say that it
320/// applies to a document with the provided digest.
321pub fn apply_diff<'a>(
322    input: &'a str,
323    diff: &'a str,
324    check_digest_in: Option<[u8; 32]>,
325) -> Result<DiffResult<'a>> {
326    let mut input = DiffResult::from_str(input, [0; 32]);
327
328    let mut diff_lines = diff.lines();
329    let (d1, d2) = parse_diff_header(&mut diff_lines)?;
330    if let Some(d_want) = check_digest_in {
331        if d1 != d_want {
332            return Err(Error::CantApply("listed digest does not match document"));
333        }
334    }
335
336    let mut output = DiffResult::new(d2);
337
338    for command in DiffCommandIter::new(diff_lines) {
339        command?.apply_transformation(&mut input, &mut output)?;
340    }
341
342    output.push_reversed(&input.lines[..]);
343
344    output.lines.reverse();
345    Ok(output)
346}
347
348/// Given a line iterator, check to make sure the first two lines are
349/// a valid diff header as specified in dir-spec.txt.
350fn parse_diff_header<'a, I>(iter: &mut I) -> Result<([u8; 32], [u8; 32])>
351where
352    I: Iterator<Item = &'a str>,
353{
354    let line1 = iter.next();
355    if line1 != Some("network-status-diff-version 1") {
356        return Err(Error::BadDiff("unrecognized or missing header"));
357    }
358    let line2 = iter.next().ok_or(Error::BadDiff("header truncated"))?;
359    if !line2.starts_with("hash ") {
360        return Err(Error::BadDiff("missing 'hash' line"));
361    }
362    let elts: Vec<_> = line2.split_ascii_whitespace().collect();
363    if elts.len() != 3 {
364        return Err(Error::BadDiff("invalid 'hash' line"));
365    }
366    let d1 = hex::decode(elts[1])?;
367    let d2 = hex::decode(elts[2])?;
368    match (d1.try_into(), d2.try_into()) {
369        (Ok(a), Ok(b)) => Ok((a, b)),
370        _ => Err(Error::BadDiff("wrong digest lengths on 'hash' line")),
371    }
372}
373
374/// A command that can appear in a diff.  Each command tells us to
375/// remove zero or more lines, and insert zero or more lines in their
376/// place.
377///
378/// Commands refer to lines by 1-indexed line number.
379#[derive(Clone, Debug)]
380enum DiffCommand<'a> {
381    /// Remove the lines from low through high, inclusive.
382    Delete {
383        /// The first line to remove
384        low: usize,
385        /// The last line to remove
386        high: usize,
387    },
388    /// Remove the lines from low through the end of the file, inclusive.
389    DeleteToEnd {
390        /// The first line to remove
391        low: usize,
392    },
393    /// Replace the lines from low through high, inclusive, with the
394    /// lines in 'lines'.
395    Replace {
396        /// The first line to replace
397        low: usize,
398        /// The last line to replace
399        high: usize,
400        /// The text to insert instead
401        lines: Vec<&'a str>,
402    },
403    /// Insert the provided 'lines' after the line with index 'pos'.
404    Insert {
405        /// The position after which to insert the text
406        pos: usize,
407        /// The text to insert
408        lines: Vec<&'a str>,
409    },
410}
411
412/// The result of applying one or more diff commands to an input string.
413///
414/// It refers to lines from the diff and the input by reference, to
415/// avoid copying.
416#[derive(Clone, Debug)]
417pub struct DiffResult<'a> {
418    /// An expected digest of the output, after it has been assembled.
419    d_post: [u8; 32],
420    /// The lines in the output.
421    lines: Vec<&'a str>,
422}
423
424/// A possible value for the end of a range.  It can be either a line number,
425/// or a dollar sign indicating "end of file".
426#[derive(Clone, Copy, Debug)]
427enum RangeEnd {
428    /// A line number in the file.
429    Num(NonZeroUsize),
430    /// A dollar sign, indicating "end of file" in a delete command.
431    DollarSign,
432}
433
434impl FromStr for RangeEnd {
435    type Err = Error;
436    fn from_str(s: &str) -> Result<RangeEnd> {
437        if s == "$" {
438            Ok(RangeEnd::DollarSign)
439        } else {
440            let v: NonZeroUsize = s.parse()?;
441            if v.get() == usize::MAX {
442                return Err(Error::BadDiff("range cannot end at usize::MAX"));
443            }
444            Ok(RangeEnd::Num(v))
445        }
446    }
447}
448
449impl<'a> DiffCommand<'a> {
450    /// Transform 'target' according to the this command.
451    ///
452    /// Because DiffResult internally uses a vector of line, this
453    /// implementation is potentially O(n) in the size of the input.
454    #[cfg(any(test, feature = "slow-diff-apply"))]
455    fn apply_to(&self, target: &mut DiffResult<'a>) -> Result<()> {
456        match self {
457            Self::Delete { low, high } => {
458                target.remove_lines(*low, *high)?;
459            }
460            Self::DeleteToEnd { low } => {
461                target.remove_lines(*low, target.lines.len())?;
462            }
463            Self::Replace { low, high, lines } => {
464                target.remove_lines(*low, *high)?;
465                target.insert_at(*low, lines)?;
466            }
467            Self::Insert { pos, lines } => {
468                // This '+1' seems off, but it's what the spec says. I wonder
469                // if the spec is wrong.
470                target.insert_at(*pos + 1, lines)?;
471            }
472        };
473        Ok(())
474    }
475
476    /// Apply this command to 'input', moving lines into 'output'.
477    ///
478    /// This is a more efficient algorithm, but it requires that the
479    /// diff commands are sorted in reverse order by line
480    /// number. (Fortunately, the Tor ed diff format guarantees this.)
481    ///
482    /// Before calling this method, input and output must contain the
483    /// results of having applied the previous command in the diff.
484    /// (When no commands have been applied, input starts out as the
485    /// original text, and output starts out empty.)
486    ///
487    /// This method applies the command by copying unaffected lines
488    /// from the _end_ of input into output, adding any lines inserted
489    /// by this command, and finally deleting any affected lines from
490    /// input.
491    ///
492    /// We build the `output` value in reverse order, and then put it
493    /// back to normal before giving it to the user.
494    fn apply_transformation(
495        &self,
496        input: &mut DiffResult<'a>,
497        output: &mut DiffResult<'a>,
498    ) -> Result<()> {
499        if let Some(succ) = self.following_lines() {
500            if let Some(subslice) = input.lines.get(succ - 1..) {
501                // Lines from `succ` onwards are unaffected.  Copy them.
502                output.push_reversed(subslice);
503            } else {
504                // Oops, dubious line number.
505                return Err(Error::CantApply(
506                    "ending line number didn't correspond to document",
507                ));
508            }
509        }
510
511        if let Some(lines) = self.lines() {
512            // These are the lines we're inserting.
513            output.push_reversed(lines);
514        }
515
516        let remove = self.first_removed_line();
517        if remove == 0 || (!self.is_insert() && remove > input.lines.len()) {
518            return Err(Error::CantApply(
519                "starting line number didn't correspond to document",
520            ));
521        }
522        input.lines.truncate(remove - 1);
523
524        Ok(())
525    }
526
527    /// Return the lines that we should add to the output
528    fn lines(&self) -> Option<&[&'a str]> {
529        match self {
530            Self::Replace { lines, .. } | Self::Insert { lines, .. } => Some(lines.as_slice()),
531            _ => None,
532        }
533    }
534
535    /// Return a mutable reference to the vector of lines we should
536    /// add to the output.
537    fn linebuf_mut(&mut self) -> Option<&mut Vec<&'a str>> {
538        match self {
539            Self::Replace { lines, .. } | Self::Insert { lines, .. } => Some(lines),
540            _ => None,
541        }
542    }
543
544    /// Return the (1-indexed) line number of the first line in the
545    /// input that comes _after_ this command, and is not affected by it.
546    ///
547    /// We use this line number to know which lines we should copy.
548    fn following_lines(&self) -> Option<usize> {
549        match self {
550            Self::Delete { high, .. } | Self::Replace { high, .. } => Some(high + 1),
551            Self::DeleteToEnd { .. } => None,
552            Self::Insert { pos, .. } => Some(pos + 1),
553        }
554    }
555
556    /// Return the (1-indexed) line number of the first line that we
557    /// should clear from the input when processing this command.
558    ///
559    /// This can be the same as following_lines(), if we shouldn't
560    /// actually remove any lines.
561    fn first_removed_line(&self) -> usize {
562        match self {
563            Self::Delete { low, .. } => *low,
564            Self::DeleteToEnd { low } => *low,
565            Self::Replace { low, .. } => *low,
566            Self::Insert { pos, .. } => *pos + 1,
567        }
568    }
569
570    /// Return true if this is an Insert command.
571    fn is_insert(&self) -> bool {
572        matches!(self, Self::Insert { .. })
573    }
574
575    /// Extract a single command from a line iterator that yields lines
576    /// of the diffs.  Return None if we're at the end of the iterator.
577    fn from_line_iterator<I>(iter: &mut I) -> Result<Option<Self>>
578    where
579        I: Iterator<Item = &'a str>,
580    {
581        let command = match iter.next() {
582            Some(s) => s,
583            None => return Ok(None),
584        };
585
586        // `command` can be of these forms: `Rc`, `Rd`, `N,$d`, and `Na`,
587        // where R is a range of form `N,N`, and where N is a line number.
588
589        if command.len() < 2 || !command.is_ascii() {
590            return Err(Error::BadDiff("command too short"));
591        }
592
593        let (range, command) = command.split_at(command.len() - 1);
594        let (low, high) = if let Some(comma_pos) = range.find(',') {
595            (
596                range[..comma_pos].parse::<usize>()?,
597                Some(range[comma_pos + 1..].parse::<RangeEnd>()?),
598            )
599        } else {
600            (range.parse::<usize>()?, None)
601        };
602
603        if low == usize::MAX {
604            return Err(Error::BadDiff("range cannot begin at usize::MAX"));
605        }
606
607        match (low, high) {
608            (lo, Some(RangeEnd::Num(hi))) if lo > hi.into() => {
609                return Err(Error::BadDiff("mis-ordered lines in range"));
610            }
611            (_, _) => (),
612        }
613
614        let mut cmd = match (command, low, high) {
615            ("d", low, None) => Self::Delete { low, high: low },
616            ("d", low, Some(RangeEnd::Num(high))) => Self::Delete {
617                low,
618                high: high.into(),
619            },
620            ("d", low, Some(RangeEnd::DollarSign)) => Self::DeleteToEnd { low },
621            ("c", low, None) => Self::Replace {
622                low,
623                high: low,
624                lines: Vec::new(),
625            },
626            ("c", low, Some(RangeEnd::Num(high))) => Self::Replace {
627                low,
628                high: high.into(),
629                lines: Vec::new(),
630            },
631            ("a", low, None) => Self::Insert {
632                pos: low,
633                lines: Vec::new(),
634            },
635            (_, _, _) => return Err(Error::BadDiff("can't parse command line")),
636        };
637
638        if let Some(ref mut linebuf) = cmd.linebuf_mut() {
639            // The 'c' and 'a' commands take a series of lines followed by a
640            // line containing a period.
641            loop {
642                match iter.next() {
643                    None => return Err(Error::BadDiff("unterminated block to insert")),
644                    Some(".") => break,
645                    Some(line) => linebuf.push(line),
646                }
647            }
648        }
649
650        Ok(Some(cmd))
651    }
652}
653
654/// Iterator that wraps a line iterator and returns a sequence of
655/// `Result<DiffCommand>`.
656///
657/// This iterator forces the commands to affect the file in reverse order,
658/// so that we can use the O(n) algorithm for applying these diffs.
659struct DiffCommandIter<'a, I>
660where
661    I: Iterator<Item = &'a str>,
662{
663    /// The underlying iterator.
664    iter: I,
665
666    /// The 'first removed line' of the last-parsed command; used to ensure
667    /// that commands appear in reverse order.
668    last_cmd_first_removed: Option<usize>,
669}
670
671impl<'a, I> DiffCommandIter<'a, I>
672where
673    I: Iterator<Item = &'a str>,
674{
675    /// Construct a new DiffCommandIter wrapping `iter`.
676    fn new(iter: I) -> Self {
677        DiffCommandIter {
678            iter,
679            last_cmd_first_removed: None,
680        }
681    }
682}
683
684impl<'a, I> Iterator for DiffCommandIter<'a, I>
685where
686    I: Iterator<Item = &'a str>,
687{
688    type Item = Result<DiffCommand<'a>>;
689    fn next(&mut self) -> Option<Result<DiffCommand<'a>>> {
690        match DiffCommand::from_line_iterator(&mut self.iter) {
691            Err(e) => Some(Err(e)),
692            Ok(None) => None,
693            Ok(Some(c)) => match (self.last_cmd_first_removed, c.following_lines()) {
694                (Some(_), None) => Some(Err(Error::BadDiff("misordered commands"))),
695                (Some(a), Some(b)) if a < b => Some(Err(Error::BadDiff("misordered commands"))),
696                (_, _) => {
697                    self.last_cmd_first_removed = Some(c.first_removed_line());
698                    Some(Ok(c))
699                }
700            },
701        }
702    }
703}
704
705impl<'a> DiffResult<'a> {
706    /// Construct a new DiffResult containing the provided string
707    /// split into lines, and an expected post-transformation digest.
708    fn from_str(s: &'a str, d_post: [u8; 32]) -> Self {
709        // As per the [netdoc syntax], newlines should be discarded and ignored.
710        //
711        // [netdoc syntax]: https://spec.torproject.org/dir-spec/netdoc.html#netdoc-syntax
712        let lines: Vec<_> = s.lines().collect();
713
714        DiffResult { d_post, lines }
715    }
716
717    /// Return a new empty DiffResult with an expected
718    /// post-transformation digests
719    fn new(d_post: [u8; 32]) -> Self {
720        DiffResult {
721            d_post,
722            lines: Vec::new(),
723        }
724    }
725
726    /// Put every member of `lines` at the end of this DiffResult, in
727    /// reverse order.
728    fn push_reversed(&mut self, lines: &[&'a str]) {
729        self.lines.extend(lines.iter().rev());
730    }
731
732    /// Remove the 1-indexed lines from `first` through `last` inclusive.
733    ///
734    /// This has to move elements around within the vector, and so it
735    /// is potentially O(n) in its length.
736    #[cfg(any(test, feature = "slow-diff-apply"))]
737    fn remove_lines(&mut self, first: usize, last: usize) -> Result<()> {
738        if first > self.lines.len() || last > self.lines.len() || first == 0 || last == 0 {
739            Err(Error::CantApply("line out of range"))
740        } else {
741            let n_to_remove = last - first + 1;
742            if last != self.lines.len() {
743                self.lines[..].copy_within((last).., first - 1);
744            }
745            self.lines.truncate(self.lines.len() - n_to_remove);
746            Ok(())
747        }
748    }
749
750    /// Insert the provided `lines` so that they appear at 1-indexed
751    /// position `pos`.
752    ///
753    /// This has to move elements around within the vector, and so it
754    /// is potentially O(n) in its length.
755    #[cfg(any(test, feature = "slow-diff-apply"))]
756    fn insert_at(&mut self, pos: usize, lines: &[&'a str]) -> Result<()> {
757        if pos > self.lines.len() + 1 || pos == 0 {
758            Err(Error::CantApply("position out of range"))
759        } else {
760            let orig_len = self.lines.len();
761            self.lines.resize(self.lines.len() + lines.len(), "");
762            self.lines
763                .copy_within(pos - 1..orig_len, pos - 1 + lines.len());
764            self.lines[(pos - 1)..(pos + lines.len() - 1)].copy_from_slice(lines);
765            Ok(())
766        }
767    }
768
769    /// See whether the output of this diff matches the target digest.
770    ///
771    /// If not, return an error.
772    pub fn check_digest(&self) -> Result<()> {
773        use digest::Digest;
774        use tor_llcrypto::d::Sha3_256;
775        let mut d = Sha3_256::new();
776        for line in &self.lines {
777            d.update(line.as_bytes());
778            d.update(b"\n");
779        }
780        if d.finalize() == self.d_post.into() {
781            Ok(())
782        } else {
783            Err(Error::CantApply("Wrong digest after applying diff"))
784        }
785    }
786}
787
788impl<'a> Display for DiffResult<'a> {
789    fn fmt(&self, f: &mut Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
790        for elt in &self.lines {
791            writeln!(f, "{}", elt)?;
792        }
793        Ok(())
794    }
795}
796
797#[cfg(test)]
798mod test {
799    // @@ begin test lint list maintained by maint/add_warning @@
800    #![allow(clippy::bool_assert_comparison)]
801    #![allow(clippy::clone_on_copy)]
802    #![allow(clippy::dbg_macro)]
803    #![allow(clippy::mixed_attributes_style)]
804    #![allow(clippy::print_stderr)]
805    #![allow(clippy::print_stdout)]
806    #![allow(clippy::single_char_pattern)]
807    #![allow(clippy::unwrap_used)]
808    #![allow(clippy::unchecked_time_subtraction)]
809    #![allow(clippy::useless_vec)]
810    #![allow(clippy::needless_pass_by_value)]
811    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
812
813    use rand::seq::IndexedRandom;
814    use tor_basic_utils::test_rng::testing_rng;
815
816    use super::*;
817
818    #[test]
819    fn remove() -> Result<()> {
820        let example = DiffResult::from_str("1\n2\n3\n4\n5\n6\n7\n8\n9\n", [0; 32]);
821
822        let mut d = example.clone();
823        d.remove_lines(5, 7)?;
824        assert_eq!(d.to_string(), "1\n2\n3\n4\n8\n9\n");
825
826        let mut d = example.clone();
827        d.remove_lines(1, 9)?;
828        assert_eq!(d.to_string(), "");
829
830        let mut d = example.clone();
831        d.remove_lines(1, 1)?;
832        assert_eq!(d.to_string(), "2\n3\n4\n5\n6\n7\n8\n9\n");
833
834        let mut d = example.clone();
835        d.remove_lines(6, 9)?;
836        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\n");
837
838        let mut d = example.clone();
839        assert!(d.remove_lines(6, 10).is_err());
840        assert!(d.remove_lines(0, 1).is_err());
841        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
842
843        Ok(())
844    }
845
846    #[test]
847    fn insert() -> Result<()> {
848        let example = DiffResult::from_str("1\n2\n3\n4\n5\n", [0; 32]);
849        let mut d = example.clone();
850        d.insert_at(3, &["hello", "world"])?;
851        assert_eq!(d.to_string(), "1\n2\nhello\nworld\n3\n4\n5\n");
852
853        let mut d = example.clone();
854        d.insert_at(6, &["hello", "world"])?;
855        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\nhello\nworld\n");
856
857        let mut d = example.clone();
858        assert!(d.insert_at(0, &["hello", "world"]).is_err());
859        assert!(d.insert_at(7, &["hello", "world"]).is_err());
860        Ok(())
861    }
862
863    #[test]
864    fn push_reversed() {
865        let mut d = DiffResult::new([0; 32]);
866        d.push_reversed(&["7", "8", "9"]);
867        assert_eq!(d.to_string(), "9\n8\n7\n");
868        d.push_reversed(&["world", "hello", ""]);
869        assert_eq!(d.to_string(), "9\n8\n7\n\nhello\nworld\n");
870    }
871
872    #[test]
873    fn apply_command_simple() {
874        let example = DiffResult::from_str("a\nb\nc\nd\ne\nf\n", [0; 32]);
875
876        let mut d = example.clone();
877        assert_eq!(d.to_string(), "a\nb\nc\nd\ne\nf\n".to_string());
878        assert!(DiffCommand::DeleteToEnd { low: 5 }.apply_to(&mut d).is_ok());
879        assert_eq!(d.to_string(), "a\nb\nc\nd\n".to_string());
880
881        let mut d = example.clone();
882        assert!(
883            DiffCommand::Delete { low: 3, high: 5 }
884                .apply_to(&mut d)
885                .is_ok()
886        );
887        assert_eq!(d.to_string(), "a\nb\nf\n".to_string());
888
889        let mut d = example.clone();
890        assert!(
891            DiffCommand::Replace {
892                low: 3,
893                high: 5,
894                lines: vec!["hello", "world"]
895            }
896            .apply_to(&mut d)
897            .is_ok()
898        );
899        assert_eq!(d.to_string(), "a\nb\nhello\nworld\nf\n".to_string());
900
901        let mut d = example.clone();
902        assert!(
903            DiffCommand::Insert {
904                pos: 3,
905                lines: vec!["hello", "world"]
906            }
907            .apply_to(&mut d)
908            .is_ok()
909        );
910        assert_eq!(
911            d.to_string(),
912            "a\nb\nc\nhello\nworld\nd\ne\nf\n".to_string()
913        );
914    }
915
916    #[test]
917    fn parse_command() -> Result<()> {
918        fn parse(s: &str) -> Result<DiffCommand<'_>> {
919            let mut iter = s.lines();
920            let cmd = DiffCommand::from_line_iterator(&mut iter)?;
921            let cmd2 = DiffCommand::from_line_iterator(&mut iter)?;
922            if cmd2.is_some() {
923                panic!("Unexpected second command");
924            }
925            Ok(cmd.unwrap())
926        }
927
928        fn parse_err(s: &str) {
929            let mut iter = s.lines();
930            let cmd = DiffCommand::from_line_iterator(&mut iter);
931            assert!(matches!(cmd, Err(Error::BadDiff(_))));
932        }
933
934        let p = parse("3,8d\n")?;
935        assert!(matches!(p, DiffCommand::Delete { low: 3, high: 8 }));
936        let p = parse("3d\n")?;
937        assert!(matches!(p, DiffCommand::Delete { low: 3, high: 3 }));
938        let p = parse("100,$d\n")?;
939        assert!(matches!(p, DiffCommand::DeleteToEnd { low: 100 }));
940
941        let p = parse("30,40c\nHello\nWorld\n.\n")?;
942        assert!(matches!(
943            p,
944            DiffCommand::Replace {
945                low: 30,
946                high: 40,
947                ..
948            }
949        ));
950        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
951        let p = parse("30c\nHello\nWorld\n.\n")?;
952        assert!(matches!(
953            p,
954            DiffCommand::Replace {
955                low: 30,
956                high: 30,
957                ..
958            }
959        ));
960        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
961
962        let p = parse("999a\nHello\nWorld\n.\n")?;
963        assert!(matches!(p, DiffCommand::Insert { pos: 999, .. }));
964        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
965        let p = parse("0a\nHello\nWorld\n.\n")?;
966        assert!(matches!(p, DiffCommand::Insert { pos: 0, .. }));
967        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
968
969        parse_err("hello world");
970        parse_err("\n\n");
971        parse_err("$,5d");
972        parse_err("5,6,8d");
973        parse_err("8,5d");
974        parse_err("6");
975        parse_err("d");
976        parse_err("-10d");
977        parse_err("4,$c\na\n.");
978        parse_err("foo");
979        parse_err("5,10p");
980        parse_err("18446744073709551615a");
981        parse_err("1,18446744073709551615d");
982
983        Ok(())
984    }
985
986    #[test]
987    fn apply_transformation() -> Result<()> {
988        let example = DiffResult::from_str("1\n2\n3\n4\n5\n6\n7\n8\n9\n", [0; 32]);
989        let empty = DiffResult::new([1; 32]);
990
991        let mut inp = example.clone();
992        let mut out = empty.clone();
993        DiffCommand::DeleteToEnd { low: 5 }.apply_transformation(&mut inp, &mut out)?;
994        assert_eq!(inp.to_string(), "1\n2\n3\n4\n");
995        assert_eq!(out.to_string(), "");
996
997        let mut inp = example.clone();
998        let mut out = empty.clone();
999        DiffCommand::DeleteToEnd { low: 9 }.apply_transformation(&mut inp, &mut out)?;
1000        assert_eq!(inp.to_string(), "1\n2\n3\n4\n5\n6\n7\n8\n");
1001        assert_eq!(out.to_string(), "");
1002
1003        let mut inp = example.clone();
1004        let mut out = empty.clone();
1005        DiffCommand::Delete { low: 3, high: 5 }.apply_transformation(&mut inp, &mut out)?;
1006        assert_eq!(inp.to_string(), "1\n2\n");
1007        assert_eq!(out.to_string(), "9\n8\n7\n6\n");
1008
1009        let mut inp = example.clone();
1010        let mut out = empty.clone();
1011        DiffCommand::Replace {
1012            low: 5,
1013            high: 6,
1014            lines: vec!["oh hey", "there"],
1015        }
1016        .apply_transformation(&mut inp, &mut out)?;
1017        assert_eq!(inp.to_string(), "1\n2\n3\n4\n");
1018        assert_eq!(out.to_string(), "9\n8\n7\nthere\noh hey\n");
1019
1020        let mut inp = example.clone();
1021        let mut out = empty.clone();
1022        DiffCommand::Insert {
1023            pos: 3,
1024            lines: vec!["oh hey", "there"],
1025        }
1026        .apply_transformation(&mut inp, &mut out)?;
1027        assert_eq!(inp.to_string(), "1\n2\n3\n");
1028        assert_eq!(out.to_string(), "9\n8\n7\n6\n5\n4\nthere\noh hey\n");
1029        DiffCommand::Insert {
1030            pos: 0,
1031            lines: vec!["boom!"],
1032        }
1033        .apply_transformation(&mut inp, &mut out)?;
1034        assert_eq!(inp.to_string(), "");
1035        assert_eq!(
1036            out.to_string(),
1037            "9\n8\n7\n6\n5\n4\nthere\noh hey\n3\n2\n1\nboom!\n"
1038        );
1039
1040        let mut inp = example.clone();
1041        let mut out = empty.clone();
1042        let r = DiffCommand::Delete {
1043            low: 100,
1044            high: 200,
1045        }
1046        .apply_transformation(&mut inp, &mut out);
1047        assert!(r.is_err());
1048        let r = DiffCommand::Delete { low: 5, high: 200 }.apply_transformation(&mut inp, &mut out);
1049        assert!(r.is_err());
1050        let r = DiffCommand::Delete { low: 0, high: 1 }.apply_transformation(&mut inp, &mut out);
1051        assert!(r.is_err());
1052        let r = DiffCommand::DeleteToEnd { low: 10 }.apply_transformation(&mut inp, &mut out);
1053        assert!(r.is_err());
1054        Ok(())
1055    }
1056
1057    #[test]
1058    fn header() -> Result<()> {
1059        fn header_from(s: &str) -> Result<([u8; 32], [u8; 32])> {
1060            let mut iter = s.lines();
1061            parse_diff_header(&mut iter)
1062        }
1063
1064        let (a,b) = header_from(
1065            "network-status-diff-version 1
1066hash B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663 F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB"
1067        )?;
1068
1069        assert_eq!(
1070            &a[..],
1071            hex::decode("B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663")?
1072        );
1073        assert_eq!(
1074            &b[..],
1075            hex::decode("F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB")?
1076        );
1077
1078        assert!(header_from("network-status-diff-version 2\n").is_err());
1079        assert!(header_from("").is_err());
1080        assert!(header_from("5,$d\n1,2d\n").is_err());
1081        assert!(header_from("network-status-diff-version 1\n").is_err());
1082        assert!(
1083            header_from(
1084                "network-status-diff-version 1
1085hash x y
10865,5d"
1087            )
1088            .is_err()
1089        );
1090        assert!(
1091            header_from(
1092                "network-status-diff-version 1
1093hash x y
10945,5d"
1095            )
1096            .is_err()
1097        );
1098        assert!(
1099            header_from(
1100                "network-status-diff-version 1
1101hash AA BB
11025,5d"
1103            )
1104            .is_err()
1105        );
1106        assert!(
1107            header_from(
1108                "network-status-diff-version 1
1109oh hello there
11105,5d"
1111            )
1112            .is_err()
1113        );
1114        assert!(header_from("network-status-diff-version 1
1115hash B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663 F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB extra").is_err());
1116
1117        Ok(())
1118    }
1119
1120    #[test]
1121    fn apply_simple() {
1122        let pre = include_str!("../testdata/consensus1.txt");
1123        let diff = include_str!("../testdata/diff1.txt");
1124        let post = include_str!("../testdata/consensus2.txt");
1125
1126        let result = apply_diff_trivial(pre, diff).unwrap();
1127        assert!(result.check_digest().is_ok());
1128        assert_eq!(result.to_string(), post);
1129    }
1130
1131    #[test]
1132    fn sort_order() -> Result<()> {
1133        fn cmds(s: &str) -> Result<Vec<DiffCommand<'_>>> {
1134            let mut out = Vec::new();
1135            for cmd in DiffCommandIter::new(s.lines()) {
1136                out.push(cmd?);
1137            }
1138            Ok(out)
1139        }
1140
1141        let _ = cmds("6,9d\n5,5d\n")?;
1142        assert!(cmds("5,5d\n6,9d\n").is_err());
1143        assert!(cmds("5,5d\n6,6d\n").is_err());
1144        assert!(cmds("5,5d\n5,6d\n").is_err());
1145
1146        Ok(())
1147    }
1148
1149    /// Test for cons diff using a random word generator.
1150    #[test]
1151    fn cons_diff() {
1152        // cat /usr/share/dict/words | sort -R | head -n 20 | sed 's/^/"/g' | sed 's/$/",/g'
1153        const WORDS: &[&str] = &[
1154            "citole",
1155            "aflow",
1156            "plowfoot",
1157            "coom",
1158            "retape",
1159            "perish",
1160            "overstifle",
1161            "ramshackle",
1162            "Romeo",
1163            "alme",
1164            "expressivity",
1165            "Kieffer",
1166            "tobe",
1167            "pronucleus",
1168            "countersconce",
1169            "puli",
1170            "acupunctuate",
1171            "heterolysis",
1172            "unwattled",
1173            "bismerpund",
1174        ];
1175
1176        let rng = &mut testing_rng();
1177        let mut left = (0..1000)
1178            .map(|_| WORDS.choose(rng).unwrap().to_string() + "\n")
1179            .collect::<String>();
1180        left += "directory-signature foo bar\n";
1181        let mut right = (0..1015)
1182            .map(|_| WORDS.choose(rng).unwrap().to_string() + "\n")
1183            .collect::<String>();
1184        right += "directory-signature foo baz\n";
1185
1186        let diff = gen_cons_diff(&left, &right).unwrap();
1187        let check = apply_diff(&left, &diff, None).unwrap().to_string();
1188        assert_eq!(right, check);
1189    }
1190
1191    #[test]
1192    fn dot_line() {
1193        let base = "";
1194        let target = "foo\nbar\n.\nbaz\nfoo\n";
1195        assert_eq!(
1196            gen_ed_diff(base, target).unwrap_err(),
1197            GenEdDiffError::ContainsDotLine { lno: 3 },
1198        );
1199
1200        // Also check for dot lines with trailing spaces.
1201        let target = "foo\nbar\n.   \t \nbaz\nfoo\n";
1202        assert_eq!(
1203            gen_ed_diff(base, target).unwrap_err(),
1204            GenEdDiffError::ContainsDotLine { lno: 3 },
1205        );
1206
1207        // A line starting with a dot and not ending in WS shall be fine though.
1208        let target = "foo\nbar\n.   foo\nbaz\nfoo\n";
1209        let _ = gen_ed_diff(base, target).unwrap();
1210
1211        // Use gen_cons_diff here to assume that it is actually applied.
1212        let base = "directory-signature foo baz\n";
1213        let target = ".foo bar\n. bar\ndirectory-signature foo baz\n";
1214        assert_eq!(
1215            gen_cons_diff(base, target).unwrap(),
1216            "network-status-diff-version 1\n\
1217            hash D8138DC27D9A66F5760058A6BCB71B755462B9D26B811828F124D036DE329A58 \
1218            506AC3A4407BC5305DD0D08FED3F09C2FE69847541F642A8FD13D3BD06FFE432\n\
1219            1,$d\n\
1220            0a\n\
1221            .foo bar\n\
1222            . bar\n\
1223            directory-signature foo baz\n\
1224            .\n"
1225        );
1226    }
1227
1228    #[test]
1229    fn missing_newline() {
1230        let base = "";
1231        let target = "foo\nbar\nbaz";
1232        assert_eq!(
1233            gen_ed_diff(base, target).unwrap_err(),
1234            GenEdDiffError::MissingUnixLineEnding { lno: 3 }
1235        );
1236    }
1237
1238    #[test]
1239    fn mixed_with_crlf() {
1240        let base = "";
1241        let target = "foo\r\nbar\r\nbaz\nhello\r\n";
1242        assert_eq!(
1243            gen_ed_diff(base, target).unwrap_err(),
1244            GenEdDiffError::MissingUnixLineEnding { lno: 1 }
1245        );
1246    }
1247}
tor_consdiff/lib.rs

tor_consdiff/
lib.rs