mdbook_tagger/
lib.rs

1use mdbook::book::{Book, BookItem};
2use mdbook::errors::{Error, Result};
3use mdbook::preprocess::{Preprocessor, PreprocessorContext};
4
5use std::path::PathBuf;
6use regex::Regex;
7use hashbrown::HashMap;
8
9use std::fs::File;
10use std::io::prelude::*;
11
12
13pub struct Config {
14  /// the literal `tags` from `... <!-- tags: hype; boy; --> ...`
15  marker: String,
16  /// save path of tag2posts data file
17  tag2posts_path: PathBuf,
18  /// save path of post2tags data file
19  post2tags_path: PathBuf,
20  /// Split pattern to slice tags literal
21  split: String
22}
23
24static DEFAULT_MARKER: &str = "tags";
25static DEFAULT_TAG2POSTS_PATH: &str = "tag2posts.json";
26static DEFAULT_POST2TAGS_PATH: &str = "post2tags.json";
27static DEFAULT_SPLIT: &str = ";";
28
29impl Config {
30  fn new(preprocessor_name: &str, ctx: &PreprocessorContext) -> Result<Self> {
31
32    let marker = String::from(DEFAULT_MARKER);
33    let tag2posts_path = ctx.config.book.src.join(DEFAULT_TAG2POSTS_PATH);
34    let post2tags_path = ctx.config.book.src.join(DEFAULT_POST2TAGS_PATH);
35
36    let mut config = Self {
37      marker, 
38      tag2posts_path,
39      post2tags_path,
40      split: String::from(DEFAULT_SPLIT)
41    };
42
43    let Some(cfg) = ctx.config.get_preprocessor(preprocessor_name) else {
44      return Ok(config)
45    };
46
47    let get_value_to_str = |cfg: &toml::map::Map<String, toml::value::Value>, key: &str| {
48      if let Some(x) = cfg.get(key) {
49        let res = if let Some(x) = x.as_str() {
50          Ok(x.to_string())
51        } else {
52          Err(Error::msg(format!("{key} {x:?} is not a valid string")))
53        };
54        Some(res)
55      } else {
56        None
57      }
58    };
59
60    if let Some(x) = get_value_to_str(cfg, "marker") {
61      config.marker = x?;
62    }
63
64    if let Some(x) = get_value_to_str(cfg, "tag2posts_path") {
65      config.tag2posts_path = ctx.config.book.src.join(x?.as_str());
66    }
67
68    if let Some(x) = get_value_to_str(cfg, "post2tags_path") {
69      config.post2tags_path = ctx.config.book.src.join(x?.as_str());
70    }
71
72    if let Some(x) = get_value_to_str(cfg, "split") {
73      config.split = x?;
74    }
75
76    // check out the regex syntax in advance.
77    let _ = config.regex()?;
78
79    Ok(config)
80  }
81
82  fn regex(&self) -> Result<Regex> {
83    let marker = &self.marker;
84    let re = format!("<!-- ?{}:?((?s).*?)-->", marker);
85    if let Ok(re) = Regex::new(re.as_str()) {
86      Ok(re)
87    } else {
88      Err(Error::msg(format!("marker {:?} has failed to be parsed into regular expression", marker)))
89    }
90  }
91
92  /// Collect matched data and drain the content.
93  fn collect_and_drain(
94    &self,
95    content: &mut String,
96    name: &str,
97    path: String,
98    tag2posts: &mut HashMap<String, Vec<(String, String)>>,
99    post2tags: &mut HashMap<String, Vec<String>>
100  ) -> () {
101
102    let parse_to_tags = |str: &str| {
103      str.trim().split(&self.split).into_iter().filter_map(|x| {
104        let x = x.trim();
105        if x.len()>0 {
106          Some(x.to_string())
107        } else {
108          None
109        }
110      }).collect::<Vec<_>>()
111    };
112
113    let (mut start, mut end) = (None, None);
114
115    if let Some(cap) = self.regex().unwrap().captures(content.as_str()) {
116      if let Some(match1) = cap.get(1) {
117        
118        let match0 = cap.get(0).unwrap();
119        start.replace(match0.start());
120        end.replace(match0.end());
121
122        let tags = parse_to_tags(match1.as_str());
123        post2tags.insert(path.clone(), tags.clone());
124
125        for tag in tags.into_iter() {
126          let post_ = (name.to_string(), path.clone());
127          tag2posts.entry(tag)
128            .and_modify(|list| { list.push(post_.clone()); })
129            .or_insert(vec![post_]);
130        }
131      }
132    }
133
134    // drain
135    if let Some(start) = start {
136      if let Some(end) = end {
137        let _ = content.drain(start..end);
138      }
139    }
140  }
141}
142
143
144
145pub struct Tagger;
146
147impl Tagger {
148  pub fn new() -> Self { Self }
149}
150
151impl Preprocessor for Tagger {
152  
153  fn name(&self) -> &str {
154    "tagger"
155  }
156
157  fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
158      
159    log::trace!("Preprocessor Collector is working");
160    
161    let cfg = Config::new(self.name(), ctx)?;
162
163    let mut tag2posts = HashMap::new();
164    let mut post2tags = HashMap::new();
165
166    book.for_each_mut(|book: &mut BookItem| {
167      if let BookItem::Chapter(chapter) = book {
168
169        if let Some(path) = chapter.path.as_ref().map(|x| x.as_os_str().to_str()).flatten() {
170          let name = chapter.name.to_string();
171          
172          cfg.collect_and_drain(&mut chapter.content, &name, path.to_string(), &mut tag2posts, &mut post2tags);
173        }
174      }
175    });
176
177    if tag2posts.len()>0 {
178      let data = serde_json::to_string(&tag2posts).unwrap();
179      let mut f = File::create(cfg.tag2posts_path.as_path()).unwrap();
180      f.write_all(data.as_bytes())?;
181    }
182    if post2tags.len()>0 {
183      let data = serde_json::to_string(&post2tags).unwrap();
184      let mut f = File::create(cfg.post2tags_path.as_path()).unwrap();
185      f.write_all(data.as_bytes())?;
186    }
187
188    Ok(book)
189  }
190}