1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! A collection of functions to parse the markdown code of help files.
//!
//! The structure of the markdown code is assumed to be:
//!
//! # util name
//!
//! ```text
//! usage info
//! ```
//!
//! About text
//!
//! ## Section 1
//!
//! Some content
//!
//! ## Section 2
//!
//! Some content

const MARKDOWN_CODE_FENCES: &str = "```";

/// Parses the text between the first markdown code block and the next header, if any,
/// into an about string.
pub fn parse_about(content: &str) -> String {
    content
        .lines()
        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
        .skip(1)
        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
        .skip(1)
        .take_while(|l| !l.starts_with('#'))
        .collect::<Vec<_>>()
        .join("\n")
        .trim()
        .to_string()
}

/// Parses the first markdown code block into a usage string
///
/// The code fences are removed and the name of the util is replaced
/// with `{}` so that it can be replaced with the appropriate name
/// at runtime.
pub fn parse_usage(content: &str) -> String {
    content
        .lines()
        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
        .skip(1)
        .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
        .map(|l| {
            // Replace the util name (assumed to be the first word) with "{}"
            // to be replaced with the runtime value later.
            if let Some((_util, args)) = l.split_once(' ') {
                format!("{{}} {args}\n")
            } else {
                "{}\n".to_string()
            }
        })
        .collect::<Vec<_>>()
        .join("")
        .trim()
        .to_string()
}

/// Get a single section from content
///
/// The section must be a second level section (i.e. start with `##`).
pub fn parse_section(section: &str, content: &str) -> Option<String> {
    fn is_section_header(line: &str, section: &str) -> bool {
        line.strip_prefix("##")
            .map_or(false, |l| l.trim().to_lowercase() == section)
    }

    let section = &section.to_lowercase();

    // We cannot distinguish between an empty or non-existing section below,
    // so we do a quick test to check whether the section exists
    if content.lines().all(|l| !is_section_header(l, section)) {
        return None;
    }

    // Prefix includes space to allow processing of section with level 3-6 headers
    let section_header_prefix = "## ";

    Some(
        content
            .lines()
            .skip_while(|&l| !is_section_header(l, section))
            .skip(1)
            .take_while(|l| !l.starts_with(section_header_prefix))
            .collect::<Vec<_>>()
            .join("\n")
            .trim()
            .to_string(),
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_section() {
        let input = "\
            # ls\n\
            ## some section\n\
            This is some section\n\
            \n\
            ## ANOTHER SECTION
            This is the other section\n\
            with multiple lines\n";

        assert_eq!(
            parse_section("some section", input).unwrap(),
            "This is some section"
        );
        assert_eq!(
            parse_section("SOME SECTION", input).unwrap(),
            "This is some section"
        );
        assert_eq!(
            parse_section("another section", input).unwrap(),
            "This is the other section\nwith multiple lines"
        );
    }

    #[test]
    fn test_parse_section_with_sub_headers() {
        let input = "\
            # ls\n\
            ## after section\n\
            This is some section\n\
            \n\
            ### level 3 header\n\
            \n\
            Additional text under the section.\n\
            \n\
            #### level 4 header\n\
            \n\
            Yet another paragraph\n";

        assert_eq!(
            parse_section("after section", input).unwrap(),
            "This is some section\n\n\
            ### level 3 header\n\n\
            Additional text under the section.\n\n\
            #### level 4 header\n\n\
            Yet another paragraph"
        );
    }

    #[test]
    fn test_parse_non_existing_section() {
        let input = "\
            # ls\n\
            ## some section\n\
            This is some section\n\
            \n\
            ## ANOTHER SECTION
            This is the other section\n\
            with multiple lines\n";

        assert!(parse_section("non-existing section", input).is_none());
    }

    #[test]
    fn test_parse_usage() {
        let input = "\
            # ls\n\
            ```\n\
            ls -l\n\
            ```\n\
            ## some section\n\
            This is some section\n\
            \n\
            ## ANOTHER SECTION
            This is the other section\n\
            with multiple lines\n";

        assert_eq!(parse_usage(input), "{} -l");
    }

    #[test]
    fn test_parse_multi_line_usage() {
        let input = "\
            # ls\n\
            ```\n\
            ls -a\n\
            ls -b\n\
            ls -c\n\
            ```\n\
            ## some section\n\
            This is some section\n";

        assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
    }

    #[test]
    fn test_parse_about() {
        let input = "\
            # ls\n\
            ```\n\
            ls -l\n\
            ```\n\
            \n\
            This is the about section\n\
            \n\
            ## some section\n\
            This is some section\n";

        assert_eq!(parse_about(input), "This is the about section");
    }

    #[test]
    fn test_parse_multi_line_about() {
        let input = "\
            # ls\n\
            ```\n\
            ls -l\n\
            ```\n\
            \n\
            about a\n\
            \n\
            about b\n\
            \n\
            ## some section\n\
            This is some section\n";

        assert_eq!(parse_about(input), "about a\n\nabout b");
    }
}