mdbook_all_the_markdowns/
finder.rs

1use inflector::Inflector;
2use std::path::PathBuf;
3use std::{fs, io};
4use walkdir::{DirEntry, WalkDir};
5
6/// The max folder depth to support.
7const MAX_FOLDER_DEPTH: usize = 24;
8
9#[derive(Debug)]
10pub struct MarkdownFile {
11    pub name: String,
12    pub filename: String,
13    pub section: Vec<u32>,
14    pub is_folder: bool,
15}
16
17/// ignore_matches will be used as a filter when walking down the folder structure by iterating
18/// over all the directories added as ignore.
19fn ignore_matches(ignore_patterns: Vec<String>) -> impl Fn(&DirEntry) -> bool {
20    move |entry: &DirEntry| -> bool {
21        !entry
22            .path()
23            .to_str()
24            .map(|s| {
25                for i in &ignore_patterns {
26                    if s.starts_with(i.as_str()) {
27                        return true;
28                    }
29                }
30
31                false
32            })
33            .unwrap_or(false)
34            && !is_hidden(entry)
35    }
36}
37
38/// is_hidden returns true if the file is a hidden file. We use it to ignore these files when
39/// looking for markdown files.
40fn is_hidden(entry: &DirEntry) -> bool {
41    entry
42        .file_name()
43        .to_str()
44        .map(|s| s.starts_with("."))
45        .unwrap_or(false)
46}
47
48/// Find all markdown files by iterating from the `root` and store all folders and markdown files
49/// in a list to determine what to render.
50pub fn find_markdown_files(root: String, ignore: Vec<String>) -> Vec<MarkdownFile> {
51    let mut filenames = vec![];
52
53    for entry in WalkDir::new(&root)
54        .follow_links(false)
55        .into_iter()
56        .filter_entry(ignore_matches(ignore))
57        .filter_map(|e| e.ok())
58    {
59        // Skip directories, we'll resolve them once we found all markdown files.
60        if entry.path().is_dir() {
61            continue;
62        }
63
64        // Skip starting directory.
65        if let Some(path) = entry.path().to_str() {
66            if path == root {
67                continue;
68            }
69        }
70
71        // A file is found but it's not a markdown file, move on.
72        if let Some(ex) = entry.path().extension() {
73            if ex != "md" {
74                continue;
75            }
76        } else {
77            // Not even a file extension? Skip it!
78            continue;
79        }
80
81        //  Add markdown file to filenames.
82        filenames.push(entry.path().to_path_buf());
83    }
84
85    // We want to add all folders leading up to a markdown file but so we create a set of all
86    // parent paths leading up to each markdown file. This way we can omit empty directories or
87    // paths not leading to any markdown file. In the end we want to mark each section of a path as
88    // a (sub) chapter.
89    // F.ex. the file foo/bar/README.md should have section 1.1.1 where foo is 1 and
90    // bar is 1.1.
91    let mut parents = std::collections::HashSet::new();
92    let mut folder_has_readme_md = std::collections::HashSet::new();
93
94    for path in filenames.iter() {
95        let mut path_buf = PathBuf::new();
96
97        for c in path.components() {
98            path_buf = path_buf.join(c);
99
100            // Don't add any paths until we've passed the root.
101            if path_buf.to_string_lossy().len() <= root.len() {
102                continue;
103            }
104
105            if let Some(ex) = path_buf.extension() {
106                if ex == "md" {
107                    if path_buf.ends_with("README.md") {
108                        folder_has_readme_md.insert(path.parent().unwrap().to_path_buf());
109                    }
110
111                    break;
112                }
113            }
114
115            parents.insert(path_buf.clone());
116        }
117    }
118
119    for parent in parents {
120        if !folder_has_readme_md.contains(&parent) {
121            filenames.push(parent);
122        }
123    }
124
125    // Sort the file names to get deterministic order of the index.
126    filenames.sort_by(|a, b| {
127        let a_parent = a.parent().unwrap();
128        let b_parent = b.parent().unwrap();
129
130        // If the paths are not the same use regular alphanumeric sorting.
131        if a_parent != b_parent {
132            return a.partial_cmp(b).unwrap();
133        }
134
135        // If one of them is the parent dir itself continue with regular sorting.
136        if a.is_dir() || b.is_dir() {
137            return a.partial_cmp(b).unwrap();
138        }
139
140        // If the paths are the same, ensure we sort README.md first so the section number is
141        // correct even if there are files that would be sorted alphanumerically before.
142        // For example; if we have two files /foo/README.md and /foo/INSTALLATION.md we want to
143        // sort README.md first because it will automatically get assigned section number [1] where
144        // INSTALLATION.md would be [1, 1].
145        match a.file_name() {
146            Some(v) if v == "README.md" => std::cmp::Ordering::Less,
147            _ => std::cmp::Ordering::Greater,
148        }
149    });
150
151    let mut sections: Vec<String> = vec!["".into(); MAX_FOLDER_DEPTH];
152    let mut section_ids = vec![0; MAX_FOLDER_DEPTH];
153    let mut markdowns: Vec<MarkdownFile> = vec![];
154
155    for path in filenames.iter() {
156        let mut sections_for_file = 0;
157        let is_folder = path.is_dir();
158        let path_witout_prefix = match path.strip_prefix(&root) {
159            Ok(v) => v,
160            Err(_) => continue,
161        };
162
163        for (i, c) in path_witout_prefix.components().enumerate() {
164            let section_name = c.as_os_str().to_str().unwrap();
165
166            // If this is README.md, don't increment any IDs, treat this as the folder.
167            if section_name == "README.md" {
168                continue;
169            }
170
171            // If the section is new, increment the ID.
172            if sections[i] != section_name || section_name.ends_with(".md") {
173                sections[i] = section_name.to_string();
174                section_ids[i] += 1;
175
176                // If we update index i to a new ID we must reset whatever comes after to
177                // restart counting at 0 and mark every sub folder as not seen.
178                let reset_vec = vec![0; MAX_FOLDER_DEPTH - i];
179                section_ids = section_ids[0..i + 1].to_vec();
180                section_ids.extend(reset_vec);
181
182                let reset_vec_sections = vec!["".into(); MAX_FOLDER_DEPTH - i];
183                sections = sections[0..i + 1].to_vec();
184                sections.extend(reset_vec_sections);
185            }
186
187            // Increment new sections seen to know how many sub sections to add for the current
188            // file.
189            sections_for_file += 1;
190            if sections_for_file >= MAX_FOLDER_DEPTH {
191                panic!("too deep folder structure - not supported!");
192            }
193        }
194
195        markdowns.push(MarkdownFile {
196            name: path.file_name().unwrap().to_string_lossy().to_string(),
197            filename: path.as_os_str().to_str().unwrap().to_string(),
198            section: section_ids[0..sections_for_file].to_vec(),
199            is_folder,
200        });
201    }
202
203    markdowns
204}
205
206impl MarkdownFile {
207    /// Get the title and the content from a markdown file. If the markdown file is actually a
208    /// folder, title case the folder name after replacing `_` and `-` with a space.
209    pub fn content(&self) -> io::Result<(String, String)> {
210        if self.is_folder {
211            // Seems good enough: https://stackoverflow.com/a/27086669/2274551
212            let title = self
213                .name
214                .replace("-", " ")
215                .replace("_", " ")
216                .to_title_case();
217            let content = format!("# {}", title);
218
219            return Ok((title, content));
220        }
221
222        let contents = fs::read_to_string(self.filename.clone())?;
223        let raw_title = contents
224            .lines()
225            .filter(|l| !l.is_empty())
226            .next()
227            .ok_or(std::io::ErrorKind::InvalidData)?;
228
229        let re = regex::Regex::new(r"^#+\s*").unwrap();
230        let title = re.replace_all(raw_title, "").to_string();
231
232        Ok((title, contents))
233    }
234}