uuhelp_parser/
lib.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5#![deny(missing_docs)]
6
7//! A collection of functions to parse the markdown code of help files.
8//!
9//! The structure of the markdown code is assumed to be:
10//!
11//! # util name
12//!
13//! ```text
14//! usage info
15//! ```
16//!
17//! About text
18//!
19//! ## Section 1
20//!
21//! Some content
22//!
23//! ## Section 2
24//!
25//! Some content
26
27const MARKDOWN_CODE_FENCES: &str = "```";
28
29/// Parses the text between the first markdown code block and the next header, if any,
30/// into an about string.
31pub fn parse_about(content: &str) -> String {
32    content
33        .lines()
34        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
35        .skip(1)
36        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
37        .skip(1)
38        .take_while(|l| !l.starts_with('#'))
39        .collect::<Vec<_>>()
40        .join("\n")
41        .trim()
42        .to_string()
43}
44
45/// Parses the first markdown code block into a usage string
46///
47/// The code fences are removed and the name of the util is replaced
48/// with `{}` so that it can be replaced with the appropriate name
49/// at runtime.
50pub fn parse_usage(content: &str) -> String {
51    content
52        .lines()
53        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
54        .skip(1)
55        .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
56        .map(|l| {
57            // Replace the util name (assumed to be the first word) with "{}"
58            // to be replaced with the runtime value later.
59            if let Some((_util, args)) = l.split_once(' ') {
60                format!("{{}} {args}\n")
61            } else {
62                "{}\n".to_string()
63            }
64        })
65        .collect::<String>()
66        .trim()
67        .to_string()
68}
69
70/// Get a single section from content
71///
72/// The section must be a second level section (i.e. start with `##`).
73pub fn parse_section(section: &str, content: &str) -> Option<String> {
74    fn is_section_header(line: &str, section: &str) -> bool {
75        line.strip_prefix("##")
76            .is_some_and(|l| l.trim().to_lowercase() == section)
77    }
78
79    let section = &section.to_lowercase();
80
81    // We cannot distinguish between an empty or non-existing section below,
82    // so we do a quick test to check whether the section exists
83    if content.lines().all(|l| !is_section_header(l, section)) {
84        return None;
85    }
86
87    // Prefix includes space to allow processing of section with level 3-6 headers
88    let section_header_prefix = "## ";
89
90    Some(
91        content
92            .lines()
93            .skip_while(|&l| !is_section_header(l, section))
94            .skip(1)
95            .take_while(|l| !l.starts_with(section_header_prefix))
96            .collect::<Vec<_>>()
97            .join("\n")
98            .trim()
99            .to_string(),
100    )
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn test_parse_section() {
109        let input = "\
110            # ls\n\
111            ## some section\n\
112            This is some section\n\
113            \n\
114            ## ANOTHER SECTION
115            This is the other section\n\
116            with multiple lines\n";
117
118        assert_eq!(
119            parse_section("some section", input).unwrap(),
120            "This is some section"
121        );
122        assert_eq!(
123            parse_section("SOME SECTION", input).unwrap(),
124            "This is some section"
125        );
126        assert_eq!(
127            parse_section("another section", input).unwrap(),
128            "This is the other section\nwith multiple lines"
129        );
130    }
131
132    #[test]
133    fn test_parse_section_with_sub_headers() {
134        let input = "\
135            # ls\n\
136            ## after section\n\
137            This is some section\n\
138            \n\
139            ### level 3 header\n\
140            \n\
141            Additional text under the section.\n\
142            \n\
143            #### level 4 header\n\
144            \n\
145            Yet another paragraph\n";
146
147        assert_eq!(
148            parse_section("after section", input).unwrap(),
149            "This is some section\n\n\
150            ### level 3 header\n\n\
151            Additional text under the section.\n\n\
152            #### level 4 header\n\n\
153            Yet another paragraph"
154        );
155    }
156
157    #[test]
158    fn test_parse_non_existing_section() {
159        let input = "\
160            # ls\n\
161            ## some section\n\
162            This is some section\n\
163            \n\
164            ## ANOTHER SECTION
165            This is the other section\n\
166            with multiple lines\n";
167
168        assert!(parse_section("non-existing section", input).is_none());
169    }
170
171    #[test]
172    fn test_parse_usage() {
173        let input = "\
174            # ls\n\
175            ```\n\
176            ls -l\n\
177            ```\n\
178            ## some section\n\
179            This is some section\n\
180            \n\
181            ## ANOTHER SECTION
182            This is the other section\n\
183            with multiple lines\n";
184
185        assert_eq!(parse_usage(input), "{} -l");
186    }
187
188    #[test]
189    fn test_parse_multi_line_usage() {
190        let input = "\
191            # ls\n\
192            ```\n\
193            ls -a\n\
194            ls -b\n\
195            ls -c\n\
196            ```\n\
197            ## some section\n\
198            This is some section\n";
199
200        assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
201    }
202
203    #[test]
204    fn test_parse_about() {
205        let input = "\
206            # ls\n\
207            ```\n\
208            ls -l\n\
209            ```\n\
210            \n\
211            This is the about section\n\
212            \n\
213            ## some section\n\
214            This is some section\n";
215
216        assert_eq!(parse_about(input), "This is the about section");
217    }
218
219    #[test]
220    fn test_parse_multi_line_about() {
221        let input = "\
222            # ls\n\
223            ```\n\
224            ls -l\n\
225            ```\n\
226            \n\
227            about a\n\
228            \n\
229            about b\n\
230            \n\
231            ## some section\n\
232            This is some section\n";
233
234        assert_eq!(parse_about(input), "about a\n\nabout b");
235    }
236}