mdbook/utils/
string.rs

1use regex::Regex;
2use std::ops::Bound::{Excluded, Included, Unbounded};
3use std::ops::RangeBounds;
4use std::sync::LazyLock;
5
6/// Take a range of lines from a string.
7pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
8    let start = match range.start_bound() {
9        Excluded(&n) => n + 1,
10        Included(&n) => n,
11        Unbounded => 0,
12    };
13    let lines = s.lines().skip(start);
14    match range.end_bound() {
15        Excluded(end) => lines
16            .take(end.saturating_sub(start))
17            .collect::<Vec<_>>()
18            .join("\n"),
19        Included(end) => lines
20            .take((end + 1).saturating_sub(start))
21            .collect::<Vec<_>>()
22            .join("\n"),
23        Unbounded => lines.collect::<Vec<_>>().join("\n"),
24    }
25}
26
27static ANCHOR_START: LazyLock<Regex> =
28    LazyLock::new(|| Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap());
29static ANCHOR_END: LazyLock<Regex> =
30    LazyLock::new(|| Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap());
31
32/// Take anchored lines from a string.
33/// Lines containing anchor are ignored.
34pub fn take_anchored_lines(s: &str, anchor: &str) -> String {
35    let mut retained = Vec::<&str>::new();
36    let mut anchor_found = false;
37
38    for l in s.lines() {
39        if anchor_found {
40            match ANCHOR_END.captures(l) {
41                Some(cap) => {
42                    if &cap["anchor_name"] == anchor {
43                        break;
44                    }
45                }
46                None => {
47                    if !ANCHOR_START.is_match(l) {
48                        retained.push(l);
49                    }
50                }
51            }
52        } else if let Some(cap) = ANCHOR_START.captures(l) {
53            if &cap["anchor_name"] == anchor {
54                anchor_found = true;
55            }
56        }
57    }
58
59    retained.join("\n")
60}
61
62/// Keep lines contained within the range specified as-is.
63/// For any lines not in the range, include them but use `#` at the beginning. This will hide the
64/// lines from initial display but include them when expanding the code snippet or testing with
65/// rustdoc.
66pub fn take_rustdoc_include_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
67    let mut output = String::with_capacity(s.len());
68
69    for (index, line) in s.lines().enumerate() {
70        if !range.contains(&index) {
71            output.push_str("# ");
72        }
73        output.push_str(line);
74        output.push('\n');
75    }
76    output.pop();
77    output
78}
79
80/// Keep lines between the anchor comments specified as-is.
81/// For any lines not between the anchors, include them but use `#` at the beginning. This will
82/// hide the lines from initial display but include them when expanding the code snippet or testing
83/// with rustdoc.
84pub fn take_rustdoc_include_anchored_lines(s: &str, anchor: &str) -> String {
85    let mut output = String::with_capacity(s.len());
86    let mut within_anchored_section = false;
87
88    for l in s.lines() {
89        if within_anchored_section {
90            match ANCHOR_END.captures(l) {
91                Some(cap) => {
92                    if &cap["anchor_name"] == anchor {
93                        within_anchored_section = false;
94                    }
95                }
96                None => {
97                    if !ANCHOR_START.is_match(l) {
98                        output.push_str(l);
99                        output.push('\n');
100                    }
101                }
102            }
103        } else if let Some(cap) = ANCHOR_START.captures(l) {
104            if &cap["anchor_name"] == anchor {
105                within_anchored_section = true;
106            }
107        } else if !ANCHOR_END.is_match(l) {
108            output.push_str("# ");
109            output.push_str(l);
110            output.push('\n');
111        }
112    }
113
114    output.pop();
115    output
116}
117
118#[cfg(test)]
119mod tests {
120    use super::{
121        take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
122        take_rustdoc_include_lines,
123    };
124
125    #[test]
126    #[allow(clippy::reversed_empty_ranges)] // Intentionally checking that those are correctly handled
127    fn take_lines_test() {
128        let s = "Lorem\nipsum\ndolor\nsit\namet";
129        assert_eq!(take_lines(s, 1..3), "ipsum\ndolor");
130        assert_eq!(take_lines(s, 3..), "sit\namet");
131        assert_eq!(take_lines(s, ..3), "Lorem\nipsum\ndolor");
132        assert_eq!(take_lines(s, ..), s);
133        // corner cases
134        assert_eq!(take_lines(s, 4..3), "");
135        assert_eq!(take_lines(s, ..100), s);
136    }
137
138    #[test]
139    fn take_anchored_lines_test() {
140        let s = "Lorem\nipsum\ndolor\nsit\namet";
141        assert_eq!(take_anchored_lines(s, "test"), "");
142
143        let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet";
144        assert_eq!(take_anchored_lines(s, "test"), "");
145
146        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet";
147        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
148        assert_eq!(take_anchored_lines(s, "something"), "");
149
150        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
151        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
152        assert_eq!(take_anchored_lines(s, "something"), "");
153
154        let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
155        assert_eq!(take_anchored_lines(s, "test"), "ipsum\ndolor\nsit\namet");
156        assert_eq!(take_anchored_lines(s, "something"), "");
157
158        let s = "Lorem\nANCHOR:    test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum";
159        assert_eq!(
160            take_anchored_lines(s, "test2"),
161            "ipsum\ndolor\nsit\namet\nlorem"
162        );
163        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
164        assert_eq!(take_anchored_lines(s, "something"), "");
165    }
166
167    #[test]
168    #[allow(clippy::reversed_empty_ranges)] // Intentionally checking that those are correctly handled
169    fn take_rustdoc_include_lines_test() {
170        let s = "Lorem\nipsum\ndolor\nsit\namet";
171        assert_eq!(
172            take_rustdoc_include_lines(s, 1..3),
173            "# Lorem\nipsum\ndolor\n# sit\n# amet"
174        );
175        assert_eq!(
176            take_rustdoc_include_lines(s, 3..),
177            "# Lorem\n# ipsum\n# dolor\nsit\namet"
178        );
179        assert_eq!(
180            take_rustdoc_include_lines(s, ..3),
181            "Lorem\nipsum\ndolor\n# sit\n# amet"
182        );
183        assert_eq!(take_rustdoc_include_lines(s, ..), s);
184        // corner cases
185        assert_eq!(
186            take_rustdoc_include_lines(s, 4..3),
187            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet"
188        );
189        assert_eq!(take_rustdoc_include_lines(s, ..100), s);
190    }
191
192    #[test]
193    fn take_rustdoc_include_anchored_lines_test() {
194        let s = "Lorem\nipsum\ndolor\nsit\namet";
195        assert_eq!(
196            take_rustdoc_include_anchored_lines(s, "test"),
197            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet"
198        );
199
200        let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet";
201        assert_eq!(
202            take_rustdoc_include_anchored_lines(s, "test"),
203            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet"
204        );
205
206        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet";
207        assert_eq!(
208            take_rustdoc_include_anchored_lines(s, "test"),
209            "# Lorem\n# ipsum\ndolor\nsit\namet"
210        );
211        assert_eq!(
212            take_rustdoc_include_anchored_lines(s, "something"),
213            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet"
214        );
215
216        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
217        assert_eq!(
218            take_rustdoc_include_anchored_lines(s, "test"),
219            "# Lorem\n# ipsum\ndolor\nsit\namet\n# lorem\n# ipsum"
220        );
221        assert_eq!(
222            take_rustdoc_include_anchored_lines(s, "something"),
223            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum"
224        );
225
226        let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
227        assert_eq!(
228            take_rustdoc_include_anchored_lines(s, "test"),
229            "# Lorem\nipsum\ndolor\nsit\namet\n# lorem\n# ipsum"
230        );
231        assert_eq!(
232            take_rustdoc_include_anchored_lines(s, "something"),
233            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum"
234        );
235
236        let s = "Lorem\nANCHOR:    test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum";
237        assert_eq!(
238            take_rustdoc_include_anchored_lines(s, "test2"),
239            "# Lorem\nipsum\ndolor\nsit\namet\nlorem\n# ipsum"
240        );
241        assert_eq!(
242            take_rustdoc_include_anchored_lines(s, "test"),
243            "# Lorem\n# ipsum\ndolor\nsit\namet\n# lorem\n# ipsum"
244        );
245        assert_eq!(
246            take_rustdoc_include_anchored_lines(s, "something"),
247            "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum"
248        );
249
250        let s = "Lorem\nANCHOR: test\nipsum\nANCHOR_END: test\ndolor\nANCHOR: test\nsit\nANCHOR_END: test\namet";
251        assert_eq!(
252            take_rustdoc_include_anchored_lines(s, "test"),
253            "# Lorem\nipsum\n# dolor\nsit\n# amet"
254        );
255    }
256}