mdbook_toc/
lib.rs

1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::fmt::Write;
4
5use mdbook_preprocessor::book::{Book, BookItem, Chapter};
6use mdbook_preprocessor::errors::Result;
7use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
8use pulldown_cmark::{Event, Options, Parser};
9use pulldown_cmark::{Tag::*, TagEnd};
10
11pub struct Toc;
12
13static DEFAULT_MARKER: &str = "<!-- toc -->\n";
14static DEFAULT_MAX_LEVEL: u32 = 4;
15
16/// Configuration for Table of Contents generation
17pub struct Config {
18    /// Marker to use, defaults to `<!-- toc -->\n`
19    pub marker: String,
20    /// The maximum level of headers to include in the table of contents.
21    /// Defaults to `4`.
22    pub max_level: u32,
23}
24
25impl Default for Config {
26    fn default() -> Config {
27        Config {
28            marker: DEFAULT_MARKER.into(),
29            max_level: DEFAULT_MAX_LEVEL,
30        }
31    }
32}
33
34impl Preprocessor for Toc {
35    fn name(&self) -> &str {
36        "toc"
37    }
38
39    fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
40        let mut res = None;
41        let cfg_key = |key| format!("preprocessor.{}.{}", self.name(), key);
42        let cfg = Config {
43            marker: ctx
44                .config
45                .get(&cfg_key("marker"))?
46                .unwrap_or_else(|| DEFAULT_MARKER.into()),
47            max_level: ctx
48                .config
49                .get(&cfg_key("max_level"))?
50                .unwrap_or(DEFAULT_MAX_LEVEL),
51        };
52
53        book.for_each_mut(|item: &mut BookItem| {
54            if let Some(Err(_)) = res {
55                return;
56            }
57
58            if let BookItem::Chapter(ref mut chapter) = *item {
59                res = Some(Toc::add_toc(chapter, &cfg).map(|md| {
60                    chapter.content = md;
61                }));
62            }
63        });
64
65        res.unwrap_or(Ok(())).map(|_| book)
66    }
67}
68
69fn build_toc(toc: &[(u32, String, String)]) -> String {
70    log::trace!("ToC from {toc:?}");
71    let mut result = String::new();
72
73    // "Normalize" header levels.
74    // If headers skip a level, we need to normalize them to avoid the skip.
75    // Otherwise the markdown render will escape nested levels.
76    //
77    // This is a rough approximation only.
78    let mut toc_iter = toc.iter().peekable();
79
80    // Start from the level of the first header.
81    let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
82    let mut last_lower = match toc_iter.peek() {
83        Some((lvl, _, _)) => *lvl,
84        None => 0,
85    };
86    let toc = toc.iter().map(|(lvl, name, slug)| {
87        let lvl = *lvl;
88        let lvl = match (last_lower + 1).cmp(&lvl) {
89            Ordering::Less => last_lower + 1,
90            _ => {
91                last_lower = lvl;
92                lvl
93            }
94        };
95        (lvl, name, slug)
96    });
97
98    for (level, name, slug) in toc {
99        let width = 2 * (level - min_level) as usize;
100        writeln!(result, "{:width$}* [{name}](#{slug})", "").unwrap();
101    }
102
103    result
104}
105
106/// Convert the given string to a valid HTML element ID.
107/// The only restriction is that the ID must not contain any ASCII whitespace.
108fn normalize_id(content: &str) -> String {
109    content
110        .trim()
111        .to_lowercase()
112        .chars()
113        .filter_map(|ch| {
114            if ch.is_alphanumeric() || ch == '_' || ch == '-' {
115                Some(ch)
116            } else if ch.is_whitespace() {
117                Some('-')
118            } else {
119                None
120            }
121        })
122        .collect::<String>()
123}
124
125fn add_toc(content: &str, cfg: &Config) -> Result<String> {
126    let mut toc_found = false;
127
128    let mut toc_content = vec![];
129    // None = Not yet parsing the header
130    // Some(_) = Parsing the header
131    // Some(Some(start), Some(end)) = start and end indices of the text part
132    let mut current_header: Option<(Option<usize>, Option<usize>)> = None;
133    let mut current_header_level: Option<(u32, Option<String>)> = None;
134    let mut id_counter = HashMap::new();
135
136    let opts = Options::ENABLE_TABLES
137        | Options::ENABLE_FOOTNOTES
138        | Options::ENABLE_STRIKETHROUGH
139        | Options::ENABLE_TASKLISTS
140        | Options::ENABLE_HEADING_ATTRIBUTES;
141
142    let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
143    log::trace!("Marker: {mark:?}");
144    let mut mark_start = None;
145    let mut mark_end = 0..0;
146    let mut mark_loc = 0;
147
148    let content = content.replace("\r\n", "\n");
149    for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
150        log::trace!(
151            "Event: {e:?} (span: {span:?}, content: {:?})",
152            &content[span.start..span.end]
153        );
154        if !toc_found {
155            log::trace!("TOC not found yet. Location: {mark_loc}, Start: {mark_start:?}");
156            if e == mark[mark_loc] {
157                if mark_start.is_none() {
158                    mark_start = Some(span.clone());
159                }
160                mark_loc += 1;
161                if mark_loc >= mark.len() {
162                    mark_end = span.clone();
163                    toc_found = true
164                }
165            } else if mark_loc > 0 {
166                mark_loc = 0;
167                mark_start = None;
168            } else {
169                continue;
170            }
171        }
172        log::trace!("TOC found. Location: {mark_loc}, Start: {mark_start:?}");
173
174        if let Event::Start(Heading { level, id, .. }) = e {
175            log::trace!("Header(lvl={level}, fragment={id:?})");
176            let id = id.map(|s| s.to_string());
177            current_header_level = Some((level as u32, id));
178
179            // We're within the header. We parse out text/code fragments further down.
180            current_header = Some((None, None));
181            continue;
182        }
183
184        // Headers might consist of text and code. pulldown_cmark unescapes `\\`, so we try to find
185        // the correct span and extract the text ourselves later.
186        // We enabled `HEADING_ATTRIBUTES` so attributes within `{ }` won't be in the emitted event
187        if let Some(ref mut hdr) = current_header
188            && let Event::Text(_) | Event::Code(_) = &e
189        {
190            hdr.0 = Some(hdr.0.map_or(span.start, |start| start.min(span.start)));
191            hdr.1 = Some(hdr.1.map_or(span.end, |end| end.max(span.end)));
192        }
193
194        if let Event::End(TagEnd::Heading(header_lvl)) = e {
195            // Skip if this header is nested too deeply.
196            if let Some((level, id)) = current_header_level.take() {
197                assert!(header_lvl as u32 == level);
198                let header_span = current_header.take().unwrap();
199                // Skip headers with no extractable text content
200                let Some(start) = header_span.0 else {
201                    continue;
202                };
203
204                let Some(end) = header_span.1 else {
205                    continue;
206                };
207
208                if start >= end {
209                    continue;
210                }
211
212                let header = content[start..end].trim_end();
213                let slug = if let Some(slug) = id {
214                    // If a fragment is defined, take it as is, not trying to append an extra ID
215                    // in case of duplicates (same behavior as mdBook)
216                    slug.to_owned()
217                } else {
218                    let mut slug = normalize_id(header);
219                    let id_count = id_counter.entry(slug.clone()).or_insert(0);
220
221                    // Append unique ID if multiple headers with the same name exist
222                    // to follow what mdBook does
223                    if *id_count > 0 {
224                        write!(slug, "-{id_count}").unwrap();
225                    }
226
227                    *id_count += 1;
228                    slug
229                };
230
231                if level <= cfg.max_level {
232                    toc_content.push((level, header.to_string(), slug));
233                }
234            }
235            continue;
236        }
237        if current_header_level.is_none() {
238            continue;
239        }
240    }
241
242    let toc = build_toc(&toc_content);
243    log::trace!("Built TOC: {toc:?}");
244    log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
245
246    let content = if toc_found {
247        let mark_start = mark_start.unwrap();
248        let content_before_toc = &content[0..mark_start.start];
249        let content_after_toc = &content[mark_end.end..];
250        log::trace!("content_before_toc={content_before_toc:?}");
251        log::trace!("content_after_toc={content_after_toc:?}");
252        // Multiline markers might have consumed trailing newlines,
253        // we ensure there's always one before the content.
254        let extra = if content_after_toc.is_empty() || content_after_toc.as_bytes()[0] == b'\n' {
255            ""
256        } else {
257            "\n"
258        };
259        format!("{content_before_toc}{toc}{extra}{content_after_toc}")
260    } else {
261        content.to_string()
262    };
263
264    Ok(content)
265}
266
267impl Toc {
268    /// Add a table of contents to the given chapter.
269    pub fn add_toc(chapter: &Chapter, cfg: &Config) -> Result<String> {
270        add_toc(&chapter.content, cfg)
271    }
272}