1#[cfg(feature = "memmap2")]
2use memmap2::MmapMut;
3#[cfg(feature = "memmap2")]
4use std::cmp::min;
5
6use crate::prelude::*;
7use std::{
8 cell::Cell,
9 fs::OpenOptions,
10 io::{BufRead, BufReader, Seek, Write},
11 path::Path,
12};
13
14#[cfg(not(feature = "memmap2"))]
15use std::io::Read;
16
17pub struct Taboc {
30 pub file: std::fs::File,
31 code_block: Cell<bool>,
32 max_depth: usize,
33}
34
35impl Taboc {
36 const MIN_HEADING: usize = 1;
37 const HEADING_CHAR: char = '#';
38 const CODE_BLOCK_STR: &'static str = "```";
39 const TOC_HEADING: &'static str = "## Table of contents";
40
41 pub fn new(file: std::fs::File, max_depth: usize) -> Self {
42 Self {
43 file,
44 code_block: Cell::new(false),
45 max_depth,
46 }
47 }
48
49 fn percent_encode(c: char) -> String {
51 let mut utf8_bytes: [u8; 4] = [0u8; 4];
52 let bytes = c.encode_utf8(&mut utf8_bytes);
53 let mut encoded = String::with_capacity(bytes.len() * 3);
54
55 for byte in utf8_bytes {
56 if byte == 0 {
57 break;
58 }
59 encoded.push_str(&format!("%{:02X}", byte));
60 }
61
62 encoded
63 }
64
65 fn make_link(heading_name: &str) -> String {
67 let mut res = String::with_capacity(heading_name.len());
68
69 const IGNORED_CHARACTERS: &[char] = &[
77 '+', ':', ';', '.', ',', '{', '}', '"', '@', '#', '>', '<', '[', ']', '|', '/', '?',
78 '!', '$', '*', '=', '&', '\'', '(', ')', '~',
79 ];
80
81 for c in heading_name.chars() {
82 if IGNORED_CHARACTERS.contains(&c) {
83 continue;
84 }
85
86 if c == ' ' {
87 res.push('-');
88 continue;
89 }
90
91 if c.is_ascii() {
92 res.push(c.to_ascii_lowercase());
93 continue;
94 }
95
96 if c.is_uppercase() {
97 res.push_str(&c.to_lowercase().to_string());
98 continue;
99 }
100
101 if !c.is_alphanumeric() {
102 res.push_str(&Self::percent_encode(c));
103 continue;
104 }
105
106 res.push(c);
107 }
108
109 res
110 }
111
112 fn make_line(heading_level: usize, line: &str) -> String {
114 format!(
115 "{}- [{}](#{})\n",
116 " ".repeat(heading_level - 1),
117 line,
118 Self::make_link(line)
119 )
120 }
121
122 fn valid_heading(&self, heading_level: usize, line: &str) -> bool {
124 if !(Self::MIN_HEADING..=self.max_depth).contains(&heading_level) {
125 return false;
126 }
127 if line.len() <= heading_level || line.chars().nth(heading_level) != Some(' ') {
128 return false;
129 }
130 if line.chars().nth(heading_level + 1).is_none() {
131 return false;
132 }
133 true
134 }
135
136 fn is_in_code_block(&self, line: &str) -> bool {
138 if line.starts_with(Self::CODE_BLOCK_STR) {
139 self.code_block.replace(!self.code_block.get());
140 }
141 self.code_block.get()
142 }
143
144 pub fn parse(&self) -> Result<String, Error> {
146 let mut res = format!("\n\n{}\n\n", Self::TOC_HEADING);
147
148 for l in BufReader::new(&self.file).lines() {
149 let line = l?;
150
151 if self.is_in_code_block(&line) {
152 continue;
153 }
154
155 let heading_count = line
156 .chars()
157 .take_while(|c| *c == Self::HEADING_CHAR)
158 .count();
159
160 if !self.valid_heading(heading_count, &line) {
161 continue;
162 }
163
164 if line.starts_with(Self::TOC_HEADING) {
165 continue;
166 }
167
168 let heading = line
169 .chars()
170 .skip(heading_count)
171 .skip_while(|c| c.is_whitespace())
172 .collect::<String>();
173
174 res.push_str(&Self::make_line(heading_count, &heading));
175 }
176
177 res.pop();
179
180 Ok(res)
181 }
182
183 pub fn write_to_file<P: AsRef<Path>>(
188 &self,
189 path: P,
190 input: &str,
191 update_existing: bool,
192 ) -> Result<(), Error> {
193 let mut target_file = OpenOptions::new().read(true).write(true).open(path)?;
194
195 let mut pos = 0;
196 let lookup_header = "## ";
197 let mut line_buf = Vec::new();
198 let mut reader = BufReader::new(&target_file);
199
200 let mut already_exists = false;
201
202 while let Ok(char_count) = reader.read_until(b'\n', &mut line_buf) {
203 if char_count == 0 {
204 break;
205 }
206
207 if line_buf.starts_with(lookup_header.as_bytes()) {
208 let windows_toc = line_buf[line_buf.len().saturating_sub(2)] != b'\r'
209 && &line_buf[0..line_buf.len().saturating_sub(1)]
210 == Self::TOC_HEADING.as_bytes();
211 let unix_toc = &line_buf[0..line_buf.len()] == Self::TOC_HEADING.as_bytes();
212 if !update_existing && (windows_toc || unix_toc) {
213 return Err(
214 anyhow!("There's already a table of contents in the first heading of the second level of this file.")
215 );
216 } else if windows_toc || unix_toc {
217 already_exists = true;
218 }
219 pos -= lookup_header.len() as u64 - 1;
221 break;
222 }
223
224 pos += char_count as u64;
225
226 line_buf.clear();
227 }
228
229 target_file.seek(std::io::SeekFrom::Start(pos))?;
230 #[cfg(feature = "memmap2")]
231 let rest_map = unsafe { MmapMut::map_mut(&target_file)? };
232 #[cfg(feature = "memmap2")]
233 let mut rest = &rest_map[..];
234 #[cfg(not(feature = "memmap2"))]
235 let mut rest = Vec::<u8>::new();
236 #[cfg(not(feature = "memmap2"))]
237 target_file.read_to_end(&mut rest)?;
238
239 target_file.seek(std::io::SeekFrom::Start(pos))?;
240
241 if already_exists {
242 let mut reader = BufReader::new(&target_file);
243 let mut drain_pos = 0;
244
245 let mut end_heading_count = 3;
247 let mut last_line_char_count = 0;
248
249 while let Ok(char_count) = reader.read_until(b'\n', &mut line_buf) {
250 if line_buf.starts_with(b"#") {
253 end_heading_count -= 1;
254 } else if end_heading_count == 0 {
255 drain_pos -= last_line_char_count;
256 break;
257 }
258 if line_buf.trim_ascii().is_empty() && end_heading_count == 1 {
259 drain_pos -= char_count;
260 }
261
262 drain_pos += char_count;
263
264 line_buf.clear();
265 last_line_char_count = char_count;
266 }
267
268 #[cfg(feature = "memmap2")]
269 {
270 rest = &rest_map[min(drain_pos - 1, rest_map.len() - 1)..];
271 }
272 #[cfg(not(feature = "memmap2"))]
273 rest.drain(..drain_pos);
274 }
275
276 target_file.seek(std::io::SeekFrom::Start(pos))?;
277 target_file.write_all(input.as_bytes())?;
278 #[cfg(feature = "memmap2")]
279 target_file.write_all(rest)?;
280 #[cfg(feature = "memmap2")]
281 rest_map.flush()?;
282 #[cfg(not(feature = "memmap2"))]
283 target_file.write_all(&rest)?;
284
285 Ok(())
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::Taboc;
292
293 #[test]
294 fn percent_encode() {
295 assert_eq!(Taboc::percent_encode('😁'), "%F0%9F%98%81");
296 assert_eq!(Taboc::percent_encode('♊'), "%E2%99%8A");
297 assert_eq!(Taboc::percent_encode('⏳'), "%E2%8F%B3");
298 assert_eq!(Taboc::percent_encode('❌'), "%E2%9D%8C");
299 assert_eq!(Taboc::percent_encode('⏪'), "%E2%8F%AA");
300 assert_eq!(Taboc::percent_encode('⛪'), "%E2%9B%AA");
301 assert_eq!(Taboc::percent_encode('⟣'), "%E2%9F%A3");
302 assert_eq!(Taboc::percent_encode('⛟'), "%E2%9B%9F");
303 }
304}