Skip to main content

formatparse_core/
input_line_continuations.rs

1//! Backslash–newline continuations in **matched input** for `:ml` / `:blk` fields (GitHub issue #80).
2//!
3//! Same rules as format **pattern** continuations (issue #68): a run of `k` backslashes immediately
4//! before a line ending (`\n` or `\r\n`) — if `k` is odd, the line continues (the last backslash
5//! plus the line break are removed, and `floor((k-1)/2)` literal backslashes are kept); if `k` is
6//! even, `k/2` literal backslashes are kept and the line break is preserved. After a continuation,
7//! ASCII spaces and tabs at the start of the next physical line are stripped.
8
9/// Fold backslash line continuations in captured multiline / indent-block text.
10pub fn normalize_input_line_continuations(input: &str) -> String {
11    let b = input.as_bytes();
12    let mut out = Vec::with_capacity(b.len());
13    let mut i = 0usize;
14    while i < b.len() {
15        let start = i;
16        let mut j = i;
17        while j < b.len() && b[j] != b'\n' && b[j] != b'\r' {
18            j += 1;
19        }
20        let seg = &b[start..j];
21        if j == b.len() {
22            out.extend_from_slice(seg);
23            break;
24        }
25        let mut k = 0usize;
26        let mut p = seg.len();
27        while p > 0 && seg[p - 1] == b'\\' {
28            k += 1;
29            p -= 1;
30        }
31        let prefix_end = seg.len().saturating_sub(k);
32        if k % 2 == 1 {
33            out.extend_from_slice(&seg[..prefix_end]);
34            let emit = (k - 1) / 2;
35            out.extend(std::iter::repeat_n(b'\\', emit));
36            if b[j] == b'\r' && j + 1 < b.len() && b[j + 1] == b'\n' {
37                i = j + 2;
38            } else {
39                i = j + 1;
40            }
41            while i < b.len() && matches!(b[i], b' ' | b'\t') {
42                i += 1;
43            }
44        } else {
45            out.extend_from_slice(seg);
46            if b[j] == b'\r' && j + 1 < b.len() && b[j + 1] == b'\n' {
47                out.push(b'\r');
48                out.push(b'\n');
49                i = j + 2;
50            } else {
51                out.push(b[j]);
52                i = j + 1;
53            }
54        }
55    }
56    // Output is built only from valid UTF-8 slices of `input` plus ASCII `\` / newlines / spaces.
57    String::from_utf8(out).expect("normalize_input_line_continuations: UTF-8 invariant")
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63
64    #[test]
65    fn continuation_joins_lines() {
66        assert_eq!(normalize_input_line_continuations("foo\\\nbar"), "foobar");
67    }
68
69    #[test]
70    fn continuation_crlf() {
71        assert_eq!(normalize_input_line_continuations("foo\\\r\nbar"), "foobar");
72    }
73
74    #[test]
75    fn even_backslashes_keep_newline() {
76        assert_eq!(
77            normalize_input_line_continuations("foo\\\\\nbar"),
78            "foo\\\\\nbar"
79        );
80    }
81
82    #[test]
83    fn odd_three_backslashes() {
84        assert_eq!(normalize_input_line_continuations("a\\\\\\\nb"), "a\\b");
85    }
86
87    #[test]
88    fn literal_newline_unchanged() {
89        assert_eq!(normalize_input_line_continuations("a\nb"), "a\nb");
90    }
91
92    #[test]
93    fn empty() {
94        assert_eq!(normalize_input_line_continuations(""), "");
95    }
96
97    #[test]
98    fn continuation_strips_spaces_tabs_on_next_line() {
99        assert_eq!(
100            normalize_input_line_continuations("foo\\\n  \t  bar"),
101            "foobar"
102        );
103    }
104}