Skip to main content

tor_consdiff/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3// @@ begin lint list maintained by maint/add_warning @@
4#![allow(renamed_and_removed_lints)] // @@REMOVE_WHEN(ci_arti_stable)
5#![allow(unknown_lints)] // @@REMOVE_WHEN(ci_arti_nightly)
6#![warn(missing_docs)]
7#![warn(noop_method_call)]
8#![warn(unreachable_pub)]
9#![warn(clippy::all)]
10#![deny(clippy::await_holding_lock)]
11#![deny(clippy::cargo_common_metadata)]
12#![deny(clippy::cast_lossless)]
13#![deny(clippy::checked_conversions)]
14#![warn(clippy::cognitive_complexity)]
15#![deny(clippy::debug_assert_with_mut_call)]
16#![deny(clippy::exhaustive_enums)]
17#![deny(clippy::exhaustive_structs)]
18#![deny(clippy::expl_impl_clone_on_copy)]
19#![deny(clippy::fallible_impl_from)]
20#![deny(clippy::implicit_clone)]
21#![deny(clippy::large_stack_arrays)]
22#![warn(clippy::manual_ok_or)]
23#![deny(clippy::missing_docs_in_private_items)]
24#![warn(clippy::needless_borrow)]
25#![warn(clippy::needless_pass_by_value)]
26#![warn(clippy::option_option)]
27#![deny(clippy::print_stderr)]
28#![deny(clippy::print_stdout)]
29#![warn(clippy::rc_buffer)]
30#![deny(clippy::ref_option_ref)]
31#![warn(clippy::semicolon_if_nothing_returned)]
32#![warn(clippy::trait_duplication_in_bounds)]
33#![deny(clippy::unchecked_time_subtraction)]
34#![deny(clippy::unnecessary_wraps)]
35#![warn(clippy::unseparated_literal_suffix)]
36#![deny(clippy::unwrap_used)]
37#![deny(clippy::mod_module_files)]
38#![allow(clippy::let_unit_value)] // This can reasonably be done for explicitness
39#![allow(clippy::uninlined_format_args)]
40#![allow(clippy::significant_drop_in_scrutinee)] // arti/-/merge_requests/588/#note_2812945
41#![allow(clippy::result_large_err)] // temporary workaround for arti#587
42#![allow(clippy::needless_raw_string_hashes)] // complained-about code is fine, often best
43#![allow(clippy::needless_lifetimes)] // See arti#1765
44#![allow(mismatched_lifetime_syntaxes)] // temporary workaround for arti#2060
45#![allow(clippy::collapsible_if)] // See arti#2342
46#![deny(clippy::unused_async)]
47#![deny(clippy::string_slice)] // See arti#2571
48//! <!-- @@ end lint list maintained by maint/add_warning @@ -->
49
50use std::fmt::{Display, Formatter, Write};
51use std::num::NonZeroUsize;
52use std::str::FromStr;
53
54mod err;
55use digest::Digest;
56pub use err::Error;
57use imara_diff::{Algorithm, Diff, Hunk, InternedInput};
58use tor_error::internal;
59use tor_netdoc::parse2::{ErrorProblem, ItemStream, KeywordRef, ParseError, ParseInput};
60
61use crate::err::GenEdDiffError;
62
63/// Result type used by this crate
64type Result<T> = std::result::Result<T, Error>;
65
66/// The keyword that identifies a directory signature line.
67// TODO: We probably want this in tor-netdoc.
68const DIRECTORY_SIGNATURE_KEYWORD: KeywordRef = KeywordRef::new_const("directory-signature");
69
70/// When hashing the signed part of the consensus, append this tail to the end.
71const CONSENSUS_SIGNED_SHA3_256_HASH_TAIL: &str = "directory-signature ";
72
73// Do not compile if we cannot safely convert a u32 into a usize.
74static_assertions::const_assert!(std::mem::size_of::<usize>() >= std::mem::size_of::<u32>());
75
76/// Generates a consensus diff.
77///
78/// This implementation is different from the one in CTor, because it uses a
79/// different algorithm, namely [`Algorithm::Myers`] from the [`imara_diff`]
80/// crate, which is more efficient than CTor in terms of runtime and about as
81/// equally efficient as CTor in output size.
82///
83/// The CTor implementation makes heavy use of the fact that the input is a
84/// valid consensus and that the routers in it are ordered.  This allows for
85/// some divide-and-conquer mechanisms and the cost of requiring more parsing.
86///
87/// Here, we only minimally parse the consensus, in order to only obtain the
88/// first `directory-signature` item and to cut everything including itself off
89/// from the input, as demanded by the specification.
90///
91/// All outputs of this function are guaranteed to work with this
92/// [`apply_diff()`] implementation as a check is performed before returning,
93/// because returning an unusable diff would be terrible.
94pub fn gen_cons_diff(base: &str, target: &str) -> Result<String> {
95    // Throw away the signatures.
96    let (base_signed, _) = split_directory_signatures(base)?;
97    let base_lines = base_signed.chars().filter(|c| *c == '\n').count() + 1;
98
99    // Compute the hashes for the header.
100    let base_signed_hash = hex::encode_upper({
101        let mut h = tor_llcrypto::d::Sha3_256::new();
102        h.update(base_signed);
103        h.update(CONSENSUS_SIGNED_SHA3_256_HASH_TAIL);
104        h.finalize()
105    });
106    let target_hash = hex::encode_upper(tor_llcrypto::d::Sha3_256::digest(target.as_bytes()));
107
108    // Compose the result with header.
109    let ed_diff = gen_ed_diff(base_signed, target).map_err(|e| match e {
110        GenEdDiffError::MissingUnixLineEnding { lno } => Error::InvalidInput(ParseError::new(
111            ErrorProblem::OtherBadDocument("line does not end with '\\n'"),
112            "consdiff",
113            "",
114            lno,
115            None,
116        )),
117        GenEdDiffError::ContainsDotLine { lno } => Error::InvalidInput(ParseError::new(
118            ErrorProblem::OtherBadDocument("contains dotline"),
119            "consdiff",
120            "",
121            lno,
122            None,
123        )),
124        GenEdDiffError::Write(_) => internal!("string write was not infallible?").into(),
125    })?;
126
127    let result = format!(
128        "network-status-diff-version 1\n\
129        hash {base_signed_hash} {target_hash}\n\
130        {base_lines},$d\n\
131        {ed_diff}"
132    );
133
134    // Ensure it is valid, refuse to emit an invalid diff.
135    let check = apply_diff(base, &result, None).map_err(|_| internal!("apply call failed"))?;
136    if check.to_string() != target {
137        Err(internal!("result does not match?"))?;
138    }
139
140    Ok(result)
141}
142
143/// Splits `input` at the first `directory-signature`.
144fn split_directory_signatures(input: &str) -> Result<(&str, &str)> {
145    let parse_input = ParseInput::new(input, "");
146    let mut items = ItemStream::new(&parse_input)?;
147
148    // Parse the consensus item by item until the first `directory-signature`.
149    loop {
150        // We only peek in order to get the proper byte offset.
151        // This is required because doing next() and breaking in the case of
152        // a `directory-signature` would then lead to `.byte_offset()` yielding
153        // the start of the second signature and not the start of the first one.
154        let item = items
155            .peek_keyword()
156            .map_err(|e| ParseError::new(e, "consdiff", "", items.lno_for_error(), None))?;
157
158        match item {
159            Some(DIRECTORY_SIGNATURE_KEYWORD) => {
160                let offset = items.byte_position();
161                return Ok(input
162                    .split_at_checked(offset)
163                    .ok_or_else(|| internal!("Calculated an invalid offset"))?);
164            }
165            Some(_) => {
166                // Consume the just peeked item.
167                let _ = items.next();
168            }
169            None => {
170                // We are finished.
171                return Err(Error::InvalidInput(ParseError::new(
172                    ErrorProblem::MissingItem {
173                        keyword: DIRECTORY_SIGNATURE_KEYWORD.as_str(),
174                    },
175                    "consdiff",
176                    "",
177                    items.lno_for_error(),
178                    None,
179                )));
180            }
181        }
182    }
183}
184
185/// Generates an input agnostic ed diff.
186///
187/// This function does the general logic of [`gen_cons_diff()`] but works in a
188/// document agnostic fashion.
189fn gen_ed_diff(base: &str, target: &str) -> std::result::Result<String, GenEdDiffError> {
190    let mut result = String::new();
191
192    // We use Myers' algorithm as benchmarks have shown that it provides an
193    // equal diff size as the ctor one while keeping an acceptable performance.
194    let input = InternedInput::new(base, target);
195    let mut diff = Diff::compute(Algorithm::Myers, &input);
196    diff.postprocess_lines(&input);
197
198    // Iterate through every a hunk, with a hunk being a block of changes.
199    let hunks = diff.hunks().collect::<Vec<_>>();
200    for hunk in hunks.into_iter().rev() {
201        // Format the header.
202        let hunk_type = HunkType::determine(&hunk);
203        match hunk_type {
204            // No need to do +1 because append is AFTER.
205            HunkType::Append => writeln!(result, "{}{hunk_type}", hunk.before.start)?,
206            HunkType::Delete | HunkType::Change => {
207                if hunk.before.start + 1 == hunk.before.end {
208                    // +1 because 1-indexed.
209                    writeln!(result, "{}{hunk_type}", hunk.before.start + 1)?;
210                } else {
211                    // +1 because 1-indexed; no need to do +1 on end because
212                    // the range is inclusive.
213                    writeln!(
214                        result,
215                        "{},{}{hunk_type}",
216                        hunk.before.start + 1,
217                        hunk.before.end
218                    )?;
219                }
220            }
221        }
222
223        // Format the body.
224        match hunk_type {
225            HunkType::Append | HunkType::Change => {
226                let range = (hunk.after.start)..(hunk.after.end);
227                let tlines = range
228                    .map(|idx| {
229                        let idx = usize::try_from(idx).expect("32-bit static assertion violated?");
230                        input.interner[input.after[idx]]
231                    })
232                    .collect::<Vec<_>>();
233
234                for (lno, line) in tlines.iter().copied().enumerate() {
235                    // Check that all lines end with a Unix line ending.
236                    if line.ends_with("\r\n") || !line.ends_with("\n") {
237                        // +1 because 1-indexed.
238                        return Err(GenEdDiffError::MissingUnixLineEnding { lno: lno + 1 });
239                    }
240
241                    // Check for lines consisting of a single dot plus trailing
242                    // whitespace characters.  No need to bother about "\r\n",
243                    // because we checked that one above.  Although technically
244                    // lines such as `. \n` are possible and understood
245                    // as part of ed diffs, they are not legal in tor netdocs, and
246                    // we want to be more defensive here for now; if it becomes a
247                    // problem, we may remove it later.
248                    if line.trim_end() == "." {
249                        // +1 because 1-indexed.
250                        return Err(GenEdDiffError::ContainsDotLine { lno: lno + 1 });
251                    }
252
253                    // All lines are newline terminated, no need to use writeln!
254                    write!(result, "{line}")?;
255                }
256
257                // Write the terminating dot.
258                writeln!(result, ".")?;
259            }
260            HunkType::Delete => {}
261        }
262    }
263
264    Ok(result)
265}
266
267/// The operational type of the hunk.
268#[derive(Clone, Copy, Debug, derive_more::Display)]
269enum HunkType {
270    /// This is a pure appending.
271    #[display("a")]
272    Append,
273    /// This is a pure deletion.
274    #[display("d")]
275    Delete,
276    /// This is change with potential additions and deletions.
277    #[display("c")]
278    Change,
279}
280
281impl HunkType {
282    /// Determines the type of the hunk.
283    fn determine(hunk: &Hunk) -> Self {
284        if hunk.is_pure_insertion() {
285            Self::Append
286        } else if hunk.is_pure_removal() {
287            Self::Delete
288        } else {
289            Self::Change
290        }
291    }
292}
293
294/// Return true if `s` looks more like a consensus diff than some other kind
295/// of document.
296pub fn looks_like_diff(s: &str) -> bool {
297    s.starts_with("network-status-diff-version")
298}
299
300/// Apply a given diff to an input text, and return the result from applying
301/// that diff.
302///
303/// This is a slow version, for testing and correctness checking.  It uses
304/// an O(n) operation to apply diffs, and therefore runs in O(n^2) time.
305#[cfg(any(test, feature = "slow-diff-apply"))]
306pub fn apply_diff_trivial<'a>(input: &'a str, diff: &'a str) -> Result<DiffResult<'a>> {
307    let mut diff_lines = diff.lines();
308    let (_, d2) = parse_diff_header(&mut diff_lines)?;
309
310    let mut diffable = DiffResult::from_str(input, d2);
311
312    for command in DiffCommandIter::new(diff_lines) {
313        command?.apply_to(&mut diffable)?;
314    }
315
316    Ok(diffable)
317}
318
319/// Apply a given diff to an input text, and return the result from applying
320/// that diff.
321///
322/// If `check_digest_in` is provided, require the diff to say that it
323/// applies to a document with the provided digest.
324pub fn apply_diff<'a>(
325    input: &'a str,
326    diff: &'a str,
327    check_digest_in: Option<[u8; 32]>,
328) -> Result<DiffResult<'a>> {
329    let mut input = DiffResult::from_str(input, [0; 32]);
330
331    let mut diff_lines = diff.lines();
332    let (d1, d2) = parse_diff_header(&mut diff_lines)?;
333    if let Some(d_want) = check_digest_in {
334        if d1 != d_want {
335            return Err(Error::CantApply("listed digest does not match document"));
336        }
337    }
338
339    let mut output = DiffResult::new(d2);
340
341    for command in DiffCommandIter::new(diff_lines) {
342        command?.apply_transformation(&mut input, &mut output)?;
343    }
344
345    output.push_reversed(&input.lines[..]);
346
347    output.lines.reverse();
348    Ok(output)
349}
350
351/// Given a line iterator, check to make sure the first two lines are
352/// a valid diff header as specified in dir-spec.txt.
353fn parse_diff_header<'a, I>(iter: &mut I) -> Result<([u8; 32], [u8; 32])>
354where
355    I: Iterator<Item = &'a str>,
356{
357    let line1 = iter.next();
358    if line1 != Some("network-status-diff-version 1") {
359        return Err(Error::BadDiff("unrecognized or missing header"));
360    }
361    let line2 = iter.next().ok_or(Error::BadDiff("header truncated"))?;
362    if !line2.starts_with("hash ") {
363        return Err(Error::BadDiff("missing 'hash' line"));
364    }
365    let elts: Vec<_> = line2.split_ascii_whitespace().collect();
366    if elts.len() != 3 {
367        return Err(Error::BadDiff("invalid 'hash' line"));
368    }
369    let d1 = hex::decode(elts[1])?;
370    let d2 = hex::decode(elts[2])?;
371    match (d1.try_into(), d2.try_into()) {
372        (Ok(a), Ok(b)) => Ok((a, b)),
373        _ => Err(Error::BadDiff("wrong digest lengths on 'hash' line")),
374    }
375}
376
377/// A command that can appear in a diff.  Each command tells us to
378/// remove zero or more lines, and insert zero or more lines in their
379/// place.
380///
381/// Commands refer to lines by 1-indexed line number.
382#[derive(Clone, Debug)]
383enum DiffCommand<'a> {
384    /// Remove the lines from low through high, inclusive.
385    Delete {
386        /// The first line to remove
387        low: usize,
388        /// The last line to remove
389        high: usize,
390    },
391    /// Remove the lines from low through the end of the file, inclusive.
392    DeleteToEnd {
393        /// The first line to remove
394        low: usize,
395    },
396    /// Replace the lines from low through high, inclusive, with the
397    /// lines in 'lines'.
398    Replace {
399        /// The first line to replace
400        low: usize,
401        /// The last line to replace
402        high: usize,
403        /// The text to insert instead
404        lines: Vec<&'a str>,
405    },
406    /// Insert the provided 'lines' after the line with index 'pos'.
407    Insert {
408        /// The position after which to insert the text
409        pos: usize,
410        /// The text to insert
411        lines: Vec<&'a str>,
412    },
413}
414
415/// The result of applying one or more diff commands to an input string.
416///
417/// It refers to lines from the diff and the input by reference, to
418/// avoid copying.
419#[derive(Clone, Debug)]
420pub struct DiffResult<'a> {
421    /// An expected digest of the output, after it has been assembled.
422    d_post: [u8; 32],
423    /// The lines in the output.
424    lines: Vec<&'a str>,
425}
426
427/// A possible value for the end of a range.  It can be either a line number,
428/// or a dollar sign indicating "end of file".
429#[derive(Clone, Copy, Debug)]
430enum RangeEnd {
431    /// A line number in the file.
432    Num(NonZeroUsize),
433    /// A dollar sign, indicating "end of file" in a delete command.
434    DollarSign,
435}
436
437impl FromStr for RangeEnd {
438    type Err = Error;
439    fn from_str(s: &str) -> Result<RangeEnd> {
440        if s == "$" {
441            Ok(RangeEnd::DollarSign)
442        } else {
443            let v: NonZeroUsize = s.parse()?;
444            if v.get() == usize::MAX {
445                return Err(Error::BadDiff("range cannot end at usize::MAX"));
446            }
447            Ok(RangeEnd::Num(v))
448        }
449    }
450}
451
452impl<'a> DiffCommand<'a> {
453    /// Transform 'target' according to the this command.
454    ///
455    /// Because DiffResult internally uses a vector of line, this
456    /// implementation is potentially O(n) in the size of the input.
457    #[cfg(any(test, feature = "slow-diff-apply"))]
458    fn apply_to(&self, target: &mut DiffResult<'a>) -> Result<()> {
459        match self {
460            Self::Delete { low, high } => {
461                target.remove_lines(*low, *high)?;
462            }
463            Self::DeleteToEnd { low } => {
464                target.remove_lines(*low, target.lines.len())?;
465            }
466            Self::Replace { low, high, lines } => {
467                target.remove_lines(*low, *high)?;
468                target.insert_at(*low, lines)?;
469            }
470            Self::Insert { pos, lines } => {
471                // This '+1' seems off, but it's what the spec says. I wonder
472                // if the spec is wrong.
473                target.insert_at(*pos + 1, lines)?;
474            }
475        };
476        Ok(())
477    }
478
479    /// Apply this command to 'input', moving lines into 'output'.
480    ///
481    /// This is a more efficient algorithm, but it requires that the
482    /// diff commands are sorted in reverse order by line
483    /// number. (Fortunately, the Tor ed diff format guarantees this.)
484    ///
485    /// Before calling this method, input and output must contain the
486    /// results of having applied the previous command in the diff.
487    /// (When no commands have been applied, input starts out as the
488    /// original text, and output starts out empty.)
489    ///
490    /// This method applies the command by copying unaffected lines
491    /// from the _end_ of input into output, adding any lines inserted
492    /// by this command, and finally deleting any affected lines from
493    /// input.
494    ///
495    /// We build the `output` value in reverse order, and then put it
496    /// back to normal before giving it to the user.
497    fn apply_transformation(
498        &self,
499        input: &mut DiffResult<'a>,
500        output: &mut DiffResult<'a>,
501    ) -> Result<()> {
502        if let Some(succ) = self.following_lines() {
503            if let Some(subslice) = input.lines.get(succ - 1..) {
504                // Lines from `succ` onwards are unaffected.  Copy them.
505                output.push_reversed(subslice);
506            } else {
507                // Oops, dubious line number.
508                return Err(Error::CantApply(
509                    "ending line number didn't correspond to document",
510                ));
511            }
512        }
513
514        if let Some(lines) = self.lines() {
515            // These are the lines we're inserting.
516            output.push_reversed(lines);
517        }
518
519        let remove = self.first_removed_line();
520        if remove == 0 || (!self.is_insert() && remove > input.lines.len()) {
521            return Err(Error::CantApply(
522                "starting line number didn't correspond to document",
523            ));
524        }
525        input.lines.truncate(remove - 1);
526
527        Ok(())
528    }
529
530    /// Return the lines that we should add to the output
531    fn lines(&self) -> Option<&[&'a str]> {
532        match self {
533            Self::Replace { lines, .. } | Self::Insert { lines, .. } => Some(lines.as_slice()),
534            _ => None,
535        }
536    }
537
538    /// Return a mutable reference to the vector of lines we should
539    /// add to the output.
540    fn linebuf_mut(&mut self) -> Option<&mut Vec<&'a str>> {
541        match self {
542            Self::Replace { lines, .. } | Self::Insert { lines, .. } => Some(lines),
543            _ => None,
544        }
545    }
546
547    /// Return the (1-indexed) line number of the first line in the
548    /// input that comes _after_ this command, and is not affected by it.
549    ///
550    /// We use this line number to know which lines we should copy.
551    fn following_lines(&self) -> Option<usize> {
552        match self {
553            Self::Delete { high, .. } | Self::Replace { high, .. } => Some(high + 1),
554            Self::DeleteToEnd { .. } => None,
555            Self::Insert { pos, .. } => Some(pos + 1),
556        }
557    }
558
559    /// Return the (1-indexed) line number of the first line that we
560    /// should clear from the input when processing this command.
561    ///
562    /// This can be the same as following_lines(), if we shouldn't
563    /// actually remove any lines.
564    fn first_removed_line(&self) -> usize {
565        match self {
566            Self::Delete { low, .. } => *low,
567            Self::DeleteToEnd { low } => *low,
568            Self::Replace { low, .. } => *low,
569            Self::Insert { pos, .. } => *pos + 1,
570        }
571    }
572
573    /// Return true if this is an Insert command.
574    fn is_insert(&self) -> bool {
575        matches!(self, Self::Insert { .. })
576    }
577
578    /// Extract a single command from a line iterator that yields lines
579    /// of the diffs.  Return None if we're at the end of the iterator.
580    fn from_line_iterator<I>(iter: &mut I) -> Result<Option<Self>>
581    where
582        I: Iterator<Item = &'a str>,
583    {
584        let command = match iter.next() {
585            Some(s) => s,
586            None => return Ok(None),
587        };
588
589        // `command` can be of these forms: `Rc`, `Rd`, `N,$d`, and `Na`,
590        // where R is a range of form `N,N`, and where N is a line number.
591
592        if command.len() < 2 || !command.is_ascii() {
593            return Err(Error::BadDiff("command too short"));
594        }
595
596        let (range, command) = command.split_at(command.len() - 1);
597        let (low, high) = if let Some((lo, hi)) = range.split_once(',') {
598            (lo.parse::<usize>()?, Some(hi.parse::<RangeEnd>()?))
599        } else {
600            (range.parse::<usize>()?, None)
601        };
602
603        if low == usize::MAX {
604            return Err(Error::BadDiff("range cannot begin at usize::MAX"));
605        }
606
607        match (low, high) {
608            (lo, Some(RangeEnd::Num(hi))) if lo > hi.into() => {
609                return Err(Error::BadDiff("mis-ordered lines in range"));
610            }
611            (_, _) => (),
612        }
613
614        let mut cmd = match (command, low, high) {
615            ("d", low, None) => Self::Delete { low, high: low },
616            ("d", low, Some(RangeEnd::Num(high))) => Self::Delete {
617                low,
618                high: high.into(),
619            },
620            ("d", low, Some(RangeEnd::DollarSign)) => Self::DeleteToEnd { low },
621            ("c", low, None) => Self::Replace {
622                low,
623                high: low,
624                lines: Vec::new(),
625            },
626            ("c", low, Some(RangeEnd::Num(high))) => Self::Replace {
627                low,
628                high: high.into(),
629                lines: Vec::new(),
630            },
631            ("a", low, None) => Self::Insert {
632                pos: low,
633                lines: Vec::new(),
634            },
635            (_, _, _) => return Err(Error::BadDiff("can't parse command line")),
636        };
637
638        if let Some(ref mut linebuf) = cmd.linebuf_mut() {
639            // The 'c' and 'a' commands take a series of lines followed by a
640            // line containing a period.
641            loop {
642                match iter.next() {
643                    None => return Err(Error::BadDiff("unterminated block to insert")),
644                    Some(".") => break,
645                    Some(line) => linebuf.push(line),
646                }
647            }
648        }
649
650        Ok(Some(cmd))
651    }
652}
653
654/// Iterator that wraps a line iterator and returns a sequence of
655/// `Result<DiffCommand>`.
656///
657/// This iterator forces the commands to affect the file in reverse order,
658/// so that we can use the O(n) algorithm for applying these diffs.
659struct DiffCommandIter<'a, I>
660where
661    I: Iterator<Item = &'a str>,
662{
663    /// The underlying iterator.
664    iter: I,
665
666    /// The 'first removed line' of the last-parsed command; used to ensure
667    /// that commands appear in reverse order.
668    last_cmd_first_removed: Option<usize>,
669}
670
671impl<'a, I> DiffCommandIter<'a, I>
672where
673    I: Iterator<Item = &'a str>,
674{
675    /// Construct a new DiffCommandIter wrapping `iter`.
676    fn new(iter: I) -> Self {
677        DiffCommandIter {
678            iter,
679            last_cmd_first_removed: None,
680        }
681    }
682}
683
684impl<'a, I> Iterator for DiffCommandIter<'a, I>
685where
686    I: Iterator<Item = &'a str>,
687{
688    type Item = Result<DiffCommand<'a>>;
689    fn next(&mut self) -> Option<Result<DiffCommand<'a>>> {
690        match DiffCommand::from_line_iterator(&mut self.iter) {
691            Err(e) => Some(Err(e)),
692            Ok(None) => None,
693            Ok(Some(c)) => match (self.last_cmd_first_removed, c.following_lines()) {
694                (Some(_), None) => Some(Err(Error::BadDiff("misordered commands"))),
695                (Some(a), Some(b)) if a < b => Some(Err(Error::BadDiff("misordered commands"))),
696                (_, _) => {
697                    self.last_cmd_first_removed = Some(c.first_removed_line());
698                    Some(Ok(c))
699                }
700            },
701        }
702    }
703}
704
705impl<'a> DiffResult<'a> {
706    /// Construct a new DiffResult containing the provided string
707    /// split into lines, and an expected post-transformation digest.
708    fn from_str(s: &'a str, d_post: [u8; 32]) -> Self {
709        // As per the [netdoc syntax], newlines should be discarded and ignored.
710        //
711        // [netdoc syntax]: https://spec.torproject.org/dir-spec/netdoc.html#netdoc-syntax
712        let lines: Vec<_> = s.lines().collect();
713
714        DiffResult { d_post, lines }
715    }
716
717    /// Return a new empty DiffResult with an expected
718    /// post-transformation digests
719    fn new(d_post: [u8; 32]) -> Self {
720        DiffResult {
721            d_post,
722            lines: Vec::new(),
723        }
724    }
725
726    /// Put every member of `lines` at the end of this DiffResult, in
727    /// reverse order.
728    fn push_reversed(&mut self, lines: &[&'a str]) {
729        self.lines.extend(lines.iter().rev());
730    }
731
732    /// Remove the 1-indexed lines from `first` through `last` inclusive.
733    ///
734    /// This has to move elements around within the vector, and so it
735    /// is potentially O(n) in its length.
736    #[cfg(any(test, feature = "slow-diff-apply"))]
737    fn remove_lines(&mut self, first: usize, last: usize) -> Result<()> {
738        if first > self.lines.len() || last > self.lines.len() || first == 0 || last == 0 {
739            Err(Error::CantApply("line out of range"))
740        } else {
741            let n_to_remove = last - first + 1;
742            if last != self.lines.len() {
743                self.lines[..].copy_within((last).., first - 1);
744            }
745            self.lines.truncate(self.lines.len() - n_to_remove);
746            Ok(())
747        }
748    }
749
750    /// Insert the provided `lines` so that they appear at 1-indexed
751    /// position `pos`.
752    ///
753    /// This has to move elements around within the vector, and so it
754    /// is potentially O(n) in its length.
755    #[cfg(any(test, feature = "slow-diff-apply"))]
756    fn insert_at(&mut self, pos: usize, lines: &[&'a str]) -> Result<()> {
757        if pos > self.lines.len() + 1 || pos == 0 {
758            Err(Error::CantApply("position out of range"))
759        } else {
760            let orig_len = self.lines.len();
761            self.lines.resize(self.lines.len() + lines.len(), "");
762            self.lines
763                .copy_within(pos - 1..orig_len, pos - 1 + lines.len());
764            self.lines[(pos - 1)..(pos + lines.len() - 1)].copy_from_slice(lines);
765            Ok(())
766        }
767    }
768
769    /// See whether the output of this diff matches the target digest.
770    ///
771    /// If not, return an error.
772    pub fn check_digest(&self) -> Result<()> {
773        use digest::Digest;
774        use tor_llcrypto::d::Sha3_256;
775        let mut d = Sha3_256::new();
776        for line in &self.lines {
777            d.update(line.as_bytes());
778            d.update(b"\n");
779        }
780        if d.finalize() == self.d_post.into() {
781            Ok(())
782        } else {
783            Err(Error::CantApply("Wrong digest after applying diff"))
784        }
785    }
786}
787
788impl<'a> Display for DiffResult<'a> {
789    fn fmt(&self, f: &mut Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
790        for elt in &self.lines {
791            writeln!(f, "{}", elt)?;
792        }
793        Ok(())
794    }
795}
796
797#[cfg(test)]
798mod test {
799    // @@ begin test lint list maintained by maint/add_warning @@
800    #![allow(clippy::bool_assert_comparison)]
801    #![allow(clippy::clone_on_copy)]
802    #![allow(clippy::dbg_macro)]
803    #![allow(clippy::mixed_attributes_style)]
804    #![allow(clippy::print_stderr)]
805    #![allow(clippy::print_stdout)]
806    #![allow(clippy::single_char_pattern)]
807    #![allow(clippy::unwrap_used)]
808    #![allow(clippy::unchecked_time_subtraction)]
809    #![allow(clippy::useless_vec)]
810    #![allow(clippy::needless_pass_by_value)]
811    #![allow(clippy::string_slice)] // See arti#2571
812    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
813
814    use rand::seq::IndexedRandom;
815    use tor_basic_utils::test_rng::testing_rng;
816
817    use super::*;
818
819    #[test]
820    fn remove() -> Result<()> {
821        let example = DiffResult::from_str("1\n2\n3\n4\n5\n6\n7\n8\n9\n", [0; 32]);
822
823        let mut d = example.clone();
824        d.remove_lines(5, 7)?;
825        assert_eq!(d.to_string(), "1\n2\n3\n4\n8\n9\n");
826
827        let mut d = example.clone();
828        d.remove_lines(1, 9)?;
829        assert_eq!(d.to_string(), "");
830
831        let mut d = example.clone();
832        d.remove_lines(1, 1)?;
833        assert_eq!(d.to_string(), "2\n3\n4\n5\n6\n7\n8\n9\n");
834
835        let mut d = example.clone();
836        d.remove_lines(6, 9)?;
837        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\n");
838
839        let mut d = example.clone();
840        assert!(d.remove_lines(6, 10).is_err());
841        assert!(d.remove_lines(0, 1).is_err());
842        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
843
844        Ok(())
845    }
846
847    #[test]
848    fn insert() -> Result<()> {
849        let example = DiffResult::from_str("1\n2\n3\n4\n5\n", [0; 32]);
850        let mut d = example.clone();
851        d.insert_at(3, &["hello", "world"])?;
852        assert_eq!(d.to_string(), "1\n2\nhello\nworld\n3\n4\n5\n");
853
854        let mut d = example.clone();
855        d.insert_at(6, &["hello", "world"])?;
856        assert_eq!(d.to_string(), "1\n2\n3\n4\n5\nhello\nworld\n");
857
858        let mut d = example.clone();
859        assert!(d.insert_at(0, &["hello", "world"]).is_err());
860        assert!(d.insert_at(7, &["hello", "world"]).is_err());
861        Ok(())
862    }
863
864    #[test]
865    fn push_reversed() {
866        let mut d = DiffResult::new([0; 32]);
867        d.push_reversed(&["7", "8", "9"]);
868        assert_eq!(d.to_string(), "9\n8\n7\n");
869        d.push_reversed(&["world", "hello", ""]);
870        assert_eq!(d.to_string(), "9\n8\n7\n\nhello\nworld\n");
871    }
872
873    #[test]
874    fn apply_command_simple() {
875        let example = DiffResult::from_str("a\nb\nc\nd\ne\nf\n", [0; 32]);
876
877        let mut d = example.clone();
878        assert_eq!(d.to_string(), "a\nb\nc\nd\ne\nf\n".to_string());
879        assert!(DiffCommand::DeleteToEnd { low: 5 }.apply_to(&mut d).is_ok());
880        assert_eq!(d.to_string(), "a\nb\nc\nd\n".to_string());
881
882        let mut d = example.clone();
883        assert!(
884            DiffCommand::Delete { low: 3, high: 5 }
885                .apply_to(&mut d)
886                .is_ok()
887        );
888        assert_eq!(d.to_string(), "a\nb\nf\n".to_string());
889
890        let mut d = example.clone();
891        assert!(
892            DiffCommand::Replace {
893                low: 3,
894                high: 5,
895                lines: vec!["hello", "world"]
896            }
897            .apply_to(&mut d)
898            .is_ok()
899        );
900        assert_eq!(d.to_string(), "a\nb\nhello\nworld\nf\n".to_string());
901
902        let mut d = example.clone();
903        assert!(
904            DiffCommand::Insert {
905                pos: 3,
906                lines: vec!["hello", "world"]
907            }
908            .apply_to(&mut d)
909            .is_ok()
910        );
911        assert_eq!(
912            d.to_string(),
913            "a\nb\nc\nhello\nworld\nd\ne\nf\n".to_string()
914        );
915    }
916
917    #[test]
918    fn parse_command() -> Result<()> {
919        fn parse(s: &str) -> Result<DiffCommand<'_>> {
920            let mut iter = s.lines();
921            let cmd = DiffCommand::from_line_iterator(&mut iter)?;
922            let cmd2 = DiffCommand::from_line_iterator(&mut iter)?;
923            if cmd2.is_some() {
924                panic!("Unexpected second command");
925            }
926            Ok(cmd.unwrap())
927        }
928
929        fn parse_err(s: &str) {
930            let mut iter = s.lines();
931            let cmd = DiffCommand::from_line_iterator(&mut iter);
932            assert!(matches!(cmd, Err(Error::BadDiff(_))));
933        }
934
935        let p = parse("3,8d\n")?;
936        assert!(matches!(p, DiffCommand::Delete { low: 3, high: 8 }));
937        let p = parse("3d\n")?;
938        assert!(matches!(p, DiffCommand::Delete { low: 3, high: 3 }));
939        let p = parse("100,$d\n")?;
940        assert!(matches!(p, DiffCommand::DeleteToEnd { low: 100 }));
941
942        let p = parse("30,40c\nHello\nWorld\n.\n")?;
943        assert!(matches!(
944            p,
945            DiffCommand::Replace {
946                low: 30,
947                high: 40,
948                ..
949            }
950        ));
951        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
952        let p = parse("30c\nHello\nWorld\n.\n")?;
953        assert!(matches!(
954            p,
955            DiffCommand::Replace {
956                low: 30,
957                high: 30,
958                ..
959            }
960        ));
961        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
962
963        let p = parse("999a\nHello\nWorld\n.\n")?;
964        assert!(matches!(p, DiffCommand::Insert { pos: 999, .. }));
965        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
966        let p = parse("0a\nHello\nWorld\n.\n")?;
967        assert!(matches!(p, DiffCommand::Insert { pos: 0, .. }));
968        assert_eq!(p.lines(), Some(&["Hello", "World"][..]));
969
970        parse_err("hello world");
971        parse_err("\n\n");
972        parse_err("$,5d");
973        parse_err("5,6,8d");
974        parse_err("8,5d");
975        parse_err("6");
976        parse_err("d");
977        parse_err("-10d");
978        parse_err("4,$c\na\n.");
979        parse_err("foo");
980        parse_err("5,10p");
981        parse_err("18446744073709551615a");
982        parse_err("1,18446744073709551615d");
983
984        Ok(())
985    }
986
987    #[test]
988    fn apply_transformation() -> Result<()> {
989        let example = DiffResult::from_str("1\n2\n3\n4\n5\n6\n7\n8\n9\n", [0; 32]);
990        let empty = DiffResult::new([1; 32]);
991
992        let mut inp = example.clone();
993        let mut out = empty.clone();
994        DiffCommand::DeleteToEnd { low: 5 }.apply_transformation(&mut inp, &mut out)?;
995        assert_eq!(inp.to_string(), "1\n2\n3\n4\n");
996        assert_eq!(out.to_string(), "");
997
998        let mut inp = example.clone();
999        let mut out = empty.clone();
1000        DiffCommand::DeleteToEnd { low: 9 }.apply_transformation(&mut inp, &mut out)?;
1001        assert_eq!(inp.to_string(), "1\n2\n3\n4\n5\n6\n7\n8\n");
1002        assert_eq!(out.to_string(), "");
1003
1004        let mut inp = example.clone();
1005        let mut out = empty.clone();
1006        DiffCommand::Delete { low: 3, high: 5 }.apply_transformation(&mut inp, &mut out)?;
1007        assert_eq!(inp.to_string(), "1\n2\n");
1008        assert_eq!(out.to_string(), "9\n8\n7\n6\n");
1009
1010        let mut inp = example.clone();
1011        let mut out = empty.clone();
1012        DiffCommand::Replace {
1013            low: 5,
1014            high: 6,
1015            lines: vec!["oh hey", "there"],
1016        }
1017        .apply_transformation(&mut inp, &mut out)?;
1018        assert_eq!(inp.to_string(), "1\n2\n3\n4\n");
1019        assert_eq!(out.to_string(), "9\n8\n7\nthere\noh hey\n");
1020
1021        let mut inp = example.clone();
1022        let mut out = empty.clone();
1023        DiffCommand::Insert {
1024            pos: 3,
1025            lines: vec!["oh hey", "there"],
1026        }
1027        .apply_transformation(&mut inp, &mut out)?;
1028        assert_eq!(inp.to_string(), "1\n2\n3\n");
1029        assert_eq!(out.to_string(), "9\n8\n7\n6\n5\n4\nthere\noh hey\n");
1030        DiffCommand::Insert {
1031            pos: 0,
1032            lines: vec!["boom!"],
1033        }
1034        .apply_transformation(&mut inp, &mut out)?;
1035        assert_eq!(inp.to_string(), "");
1036        assert_eq!(
1037            out.to_string(),
1038            "9\n8\n7\n6\n5\n4\nthere\noh hey\n3\n2\n1\nboom!\n"
1039        );
1040
1041        let mut inp = example.clone();
1042        let mut out = empty.clone();
1043        let r = DiffCommand::Delete {
1044            low: 100,
1045            high: 200,
1046        }
1047        .apply_transformation(&mut inp, &mut out);
1048        assert!(r.is_err());
1049        let r = DiffCommand::Delete { low: 5, high: 200 }.apply_transformation(&mut inp, &mut out);
1050        assert!(r.is_err());
1051        let r = DiffCommand::Delete { low: 0, high: 1 }.apply_transformation(&mut inp, &mut out);
1052        assert!(r.is_err());
1053        let r = DiffCommand::DeleteToEnd { low: 10 }.apply_transformation(&mut inp, &mut out);
1054        assert!(r.is_err());
1055        Ok(())
1056    }
1057
1058    #[test]
1059    fn header() -> Result<()> {
1060        fn header_from(s: &str) -> Result<([u8; 32], [u8; 32])> {
1061            let mut iter = s.lines();
1062            parse_diff_header(&mut iter)
1063        }
1064
1065        let (a,b) = header_from(
1066            "network-status-diff-version 1
1067hash B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663 F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB"
1068        )?;
1069
1070        assert_eq!(
1071            &a[..],
1072            hex::decode("B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663")?
1073        );
1074        assert_eq!(
1075            &b[..],
1076            hex::decode("F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB")?
1077        );
1078
1079        assert!(header_from("network-status-diff-version 2\n").is_err());
1080        assert!(header_from("").is_err());
1081        assert!(header_from("5,$d\n1,2d\n").is_err());
1082        assert!(header_from("network-status-diff-version 1\n").is_err());
1083        assert!(
1084            header_from(
1085                "network-status-diff-version 1
1086hash x y
10875,5d"
1088            )
1089            .is_err()
1090        );
1091        assert!(
1092            header_from(
1093                "network-status-diff-version 1
1094hash x y
10955,5d"
1096            )
1097            .is_err()
1098        );
1099        assert!(
1100            header_from(
1101                "network-status-diff-version 1
1102hash AA BB
11035,5d"
1104            )
1105            .is_err()
1106        );
1107        assert!(
1108            header_from(
1109                "network-status-diff-version 1
1110oh hello there
11115,5d"
1112            )
1113            .is_err()
1114        );
1115        assert!(header_from("network-status-diff-version 1
1116hash B03DA3ACA1D3C1D083E3FF97873002416EBD81A058B406D5C5946EAB53A79663 F6789F35B6B3BA58BB23D29E53A8ED6CBB995543DBE075DD5671481C4BA677FB extra").is_err());
1117
1118        Ok(())
1119    }
1120
1121    #[test]
1122    fn apply_simple() {
1123        let pre = include_str!("../testdata/consensus1.txt");
1124        let diff = include_str!("../testdata/diff1.txt");
1125        let post = include_str!("../testdata/consensus2.txt");
1126
1127        let result = apply_diff_trivial(pre, diff).unwrap();
1128        assert!(result.check_digest().is_ok());
1129        assert_eq!(result.to_string(), post);
1130    }
1131
1132    #[test]
1133    fn sort_order() -> Result<()> {
1134        fn cmds(s: &str) -> Result<Vec<DiffCommand<'_>>> {
1135            let mut out = Vec::new();
1136            for cmd in DiffCommandIter::new(s.lines()) {
1137                out.push(cmd?);
1138            }
1139            Ok(out)
1140        }
1141
1142        let _ = cmds("6,9d\n5,5d\n")?;
1143        assert!(cmds("5,5d\n6,9d\n").is_err());
1144        assert!(cmds("5,5d\n6,6d\n").is_err());
1145        assert!(cmds("5,5d\n5,6d\n").is_err());
1146
1147        Ok(())
1148    }
1149
1150    /// Test for cons diff using a random word generator.
1151    #[test]
1152    fn cons_diff() {
1153        // cat /usr/share/dict/words | sort -R | head -n 20 | sed 's/^/"/g' | sed 's/$/",/g'
1154        const WORDS: &[&str] = &[
1155            "citole",
1156            "aflow",
1157            "plowfoot",
1158            "coom",
1159            "retape",
1160            "perish",
1161            "overstifle",
1162            "ramshackle",
1163            "Romeo",
1164            "alme",
1165            "expressivity",
1166            "Kieffer",
1167            "tobe",
1168            "pronucleus",
1169            "countersconce",
1170            "puli",
1171            "acupunctuate",
1172            "heterolysis",
1173            "unwattled",
1174            "bismerpund",
1175        ];
1176
1177        let rng = &mut testing_rng();
1178        let mut left = (0..1000)
1179            .map(|_| WORDS.choose(rng).unwrap().to_string() + "\n")
1180            .collect::<String>();
1181        left += "directory-signature foo bar\n";
1182        let mut right = (0..1015)
1183            .map(|_| WORDS.choose(rng).unwrap().to_string() + "\n")
1184            .collect::<String>();
1185        right += "directory-signature foo baz\n";
1186
1187        let diff = gen_cons_diff(&left, &right).unwrap();
1188        let check = apply_diff(&left, &diff, None).unwrap().to_string();
1189        assert_eq!(right, check);
1190    }
1191
1192    #[test]
1193    fn dot_line() {
1194        let base = "";
1195        let target = "foo\nbar\n.\nbaz\nfoo\n";
1196        assert_eq!(
1197            gen_ed_diff(base, target).unwrap_err(),
1198            GenEdDiffError::ContainsDotLine { lno: 3 },
1199        );
1200
1201        // Also check for dot lines with trailing spaces.
1202        let target = "foo\nbar\n.   \t \nbaz\nfoo\n";
1203        assert_eq!(
1204            gen_ed_diff(base, target).unwrap_err(),
1205            GenEdDiffError::ContainsDotLine { lno: 3 },
1206        );
1207
1208        // A line starting with a dot and not ending in WS shall be fine though.
1209        let target = "foo\nbar\n.   foo\nbaz\nfoo\n";
1210        let _ = gen_ed_diff(base, target).unwrap();
1211
1212        // Use gen_cons_diff here to assume that it is actually applied.
1213        let base = "directory-signature foo baz\n";
1214        let target = ".foo bar\n. bar\ndirectory-signature foo baz\n";
1215        assert_eq!(
1216            gen_cons_diff(base, target).unwrap(),
1217            "network-status-diff-version 1\n\
1218            hash D8138DC27D9A66F5760058A6BCB71B755462B9D26B811828F124D036DE329A58 \
1219            506AC3A4407BC5305DD0D08FED3F09C2FE69847541F642A8FD13D3BD06FFE432\n\
1220            1,$d\n\
1221            0a\n\
1222            .foo bar\n\
1223            . bar\n\
1224            directory-signature foo baz\n\
1225            .\n"
1226        );
1227    }
1228
1229    #[test]
1230    fn missing_newline() {
1231        let base = "";
1232        let target = "foo\nbar\nbaz";
1233        assert_eq!(
1234            gen_ed_diff(base, target).unwrap_err(),
1235            GenEdDiffError::MissingUnixLineEnding { lno: 3 }
1236        );
1237    }
1238
1239    #[test]
1240    fn mixed_with_crlf() {
1241        let base = "";
1242        let target = "foo\r\nbar\r\nbaz\nhello\r\n";
1243        assert_eq!(
1244            gen_ed_diff(base, target).unwrap_err(),
1245            GenEdDiffError::MissingUnixLineEnding { lno: 1 }
1246        );
1247    }
1248}