mdbook_toc/
lib.rs

1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::convert::{TryFrom, TryInto};
4use std::fmt::Write;
5
6use mdbook::book::{Book, BookItem, Chapter};
7use mdbook::errors::{Error, Result};
8use mdbook::preprocess::{Preprocessor, PreprocessorContext};
9use pulldown_cmark::Tag::*;
10use pulldown_cmark::{Event, Options, Parser};
11use toml::value::Table;
12
13pub struct Toc;
14
15static DEFAULT_MARKER: &str = "<!-- toc -->\n";
16
17/// Configuration for Table of Contents generation
18pub struct Config {
19    /// Marker to use, defaults to `<!-- toc -->\n`
20    pub marker: String,
21    /// The maximum level of headers to include in the table of contents.
22    /// Defaults to `4`.
23    pub max_level: u32,
24}
25
26impl Default for Config {
27    fn default() -> Config {
28        Config {
29            marker: DEFAULT_MARKER.into(),
30            max_level: 4,
31        }
32    }
33}
34
35impl<'a> TryFrom<Option<&'a Table>> for Config {
36    type Error = Error;
37
38    fn try_from(mdbook_cfg: Option<&Table>) -> Result<Config> {
39        let mut cfg = Config::default();
40        let mdbook_cfg = match mdbook_cfg {
41            Some(c) => c,
42            None => return Ok(cfg),
43        };
44
45        if let Some(marker) = mdbook_cfg.get("marker") {
46            let marker = match marker.as_str() {
47                Some(m) => m,
48                None => {
49                    return Err(Error::msg(format!(
50                        "Marker {marker:?} is not a valid string",
51                    )))
52                }
53            };
54            cfg.marker = marker.into();
55        }
56
57        if let Some(level) = mdbook_cfg.get("max-level") {
58            let level = match level.as_integer() {
59                Some(l) => l,
60                None => {
61                    return Err(Error::msg(format!(
62                        "Level {level:?} is not a valid integer",
63                    )))
64                }
65            };
66            cfg.max_level = level.try_into()?;
67        }
68
69        Ok(cfg)
70    }
71}
72
73impl Preprocessor for Toc {
74    fn name(&self) -> &str {
75        "toc"
76    }
77
78    fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
79        let mut res = None;
80        let cfg = ctx.config.get_preprocessor(self.name()).try_into()?;
81
82        book.for_each_mut(|item: &mut BookItem| {
83            if let Some(Err(_)) = res {
84                return;
85            }
86
87            if let BookItem::Chapter(ref mut chapter) = *item {
88                res = Some(Toc::add_toc(chapter, &cfg).map(|md| {
89                    chapter.content = md;
90                }));
91            }
92        });
93
94        res.unwrap_or(Ok(())).map(|_| book)
95    }
96}
97
98fn build_toc(toc: &[(u32, String, String)]) -> String {
99    log::trace!("ToC from {toc:?}");
100    let mut result = String::new();
101
102    // "Normalize" header levels.
103    // If headers skip a level, we need to normalize them to avoid the skip.
104    // Otherwise the markdown render will escape nested levels.
105    //
106    // This is a rough approximation only.
107    let mut toc_iter = toc.iter().peekable();
108
109    // Start from the level of the first header.
110    let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
111    let mut last_lower = match toc_iter.peek() {
112        Some((lvl, _, _)) => *lvl,
113        None => 0,
114    };
115    let toc = toc.iter().map(|(lvl, name, slug)| {
116        let lvl = *lvl;
117        let lvl = match (last_lower + 1).cmp(&lvl) {
118            Ordering::Less => last_lower + 1,
119            _ => {
120                last_lower = lvl;
121                lvl
122            }
123        };
124        (lvl, name, slug)
125    });
126
127    for (level, name, slug) in toc {
128        let width = 2 * (level - min_level) as usize;
129        writeln!(result, "{:width$}* [{name}](#{slug})", "").unwrap();
130    }
131
132    result
133}
134
135fn add_toc(content: &str, cfg: &Config) -> Result<String> {
136    let mut toc_found = false;
137
138    let mut toc_content = vec![];
139    let mut current_header = String::new();
140    let mut current_header_level: Option<u32> = None;
141    let mut id_counter = HashMap::new();
142
143    let opts = Options::ENABLE_TABLES
144        | Options::ENABLE_FOOTNOTES
145        | Options::ENABLE_STRIKETHROUGH
146        | Options::ENABLE_TASKLISTS
147        | Options::ENABLE_HEADING_ATTRIBUTES;
148
149    let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
150    log::trace!("Marker: {mark:?}");
151    let mut mark_start = None;
152    let mut mark_end = 0..0;
153    let mut mark_loc = 0;
154
155    let content = content.replace("\r\n", "\n");
156    for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
157        log::trace!("Event: {e:?} (span: {span:?})");
158        if !toc_found {
159            log::trace!("TOC not found yet. Location: {mark_loc}, Start: {mark_start:?}");
160            if e == mark[mark_loc] {
161                if mark_start.is_none() {
162                    mark_start = Some(span.clone());
163                }
164                mark_loc += 1;
165                if mark_loc >= mark.len() {
166                    mark_end = span;
167                    toc_found = true
168                }
169            } else if mark_loc > 0 {
170                mark_loc = 0;
171                mark_start = None;
172            } else {
173                continue;
174            }
175        }
176
177        if let Event::Start(Heading(lvl, fragment, classes)) = e {
178            log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})");
179            current_header_level = Some(lvl as u32);
180            continue;
181        }
182        if let Event::End(Heading(_, fragment, _)) = e {
183            // Skip if this header is nested too deeply.
184            if let Some(level) = current_header_level.take() {
185                let header = current_header.clone();
186                let slug = if let Some(slug) = fragment {
187                    // If a fragment is defined, take it as is, not trying to append an extra ID
188                    // in case of duplicates (same behavior as mdBook)
189                    slug.to_owned()
190                } else {
191                    let mut slug = mdbook::utils::normalize_id(&header);
192                    let id_count = id_counter.entry(slug.clone()).or_insert(0);
193
194                    // Append unique ID if multiple headers with the same name exist
195                    // to follow what mdBook does
196                    if *id_count > 0 {
197                        write!(slug, "-{id_count}").unwrap();
198                    }
199
200                    *id_count += 1;
201                    slug
202                };
203
204                if level <= cfg.max_level {
205                    toc_content.push((level, header, slug));
206                }
207
208                current_header.clear();
209            }
210            continue;
211        }
212        if current_header_level.is_none() {
213            continue;
214        }
215
216        match e {
217            Event::Text(header) => write!(current_header, "{header}").unwrap(),
218            Event::Code(code) => write!(current_header, "`{code}`").unwrap(),
219            _ => {} // Rest is unhandled
220        }
221    }
222
223    let toc = build_toc(&toc_content);
224    log::trace!("Built TOC: {toc:?}");
225    log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
226
227    let content = if toc_found {
228        let mark_start = mark_start.unwrap();
229        let content_before_toc = &content[0..mark_start.start];
230        let content_after_toc = &content[mark_end.end..];
231        log::trace!("content_before_toc={content_before_toc:?}");
232        log::trace!("content_after_toc={content_after_toc:?}");
233        // Multiline markers might have consumed trailing newlines,
234        // we ensure there's always one before the content.
235        let extra = if content_after_toc.is_empty() || content_after_toc.as_bytes()[0] == b'\n' {
236            ""
237        } else {
238            "\n"
239        };
240        format!("{content_before_toc}{toc}{extra}{content_after_toc}")
241    } else {
242        content.to_string()
243    };
244
245    Ok(content)
246}
247
248impl Toc {
249    /// Add a table of contents to the given chapter.
250    pub fn add_toc(chapter: &Chapter, cfg: &Config) -> Result<String> {
251        add_toc(&chapter.content, cfg)
252    }
253}