rustme/
rustme.rs

1use std::{
2    collections::{BTreeMap, HashMap},
3    fs::File,
4    io::{ErrorKind, Write},
5    path::{Path, PathBuf},
6    str::Utf8Error,
7    string::FromUtf8Error,
8};
9
10use serde::{Deserialize, Serialize};
11
12/// A configuration of how to generate one or more READMEs.
13#[derive(Deserialize, Serialize, Debug)]
14pub struct Configuration {
15    /// The location that paths should be resolved relative to.
16    #[serde(skip)]
17    pub relative_to: PathBuf,
18    /// The collection of files (key) and sections (values).
19    pub files: HashMap<String, Vec<String>>,
20    /// A list of glossaries that act as a source of snippets.
21    #[serde(default)]
22    pub glossaries: Vec<Glossary>,
23}
24
25impl Configuration {
26    /// Attempts to load a configuration from `path`.
27    ///
28    /// # Errors
29    ///
30    /// - [`Error::Io`]: Returned if an error occurs interacting with the
31    ///   filesystem.
32    /// - [`Error::Ron`]: Returned if an error occurs while parsing the
33    ///   configuration file.
34    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
35        let contents = std::fs::read_to_string(path.as_ref())?;
36        let mut configuration = ron::from_str::<Self>(&contents)?;
37        configuration.relative_to = path
38            .as_ref()
39            .parent()
40            .ok_or_else(|| Error::Io(std::io::Error::from(ErrorKind::NotFound)))?
41            .to_path_buf();
42        Ok(configuration)
43    }
44
45    /// Generates the README files.
46    ///
47    /// # Errors
48    ///
49    /// Can return various errors that are encountred with files that could not
50    /// be parsed.
51    pub fn generate(&self) -> Result<(), Error> {
52        let mut snippets = HashMap::new();
53        let glossary = self.load_glossaries()?;
54        for (name, sections) in &self.files {
55            let output_path = self.relative_to.join(name);
56            if output_path.exists() {
57                std::fs::remove_file(&output_path)?;
58            }
59
60            let mut output = File::create(&output_path)?;
61            for (index, section) in sections.iter().enumerate() {
62                if index > 0 {
63                    output.write_all(b"\n")?;
64                }
65                let markdown = if section.starts_with("http://") || section.starts_with("https://")
66                {
67                    ureq::get(section)
68                        .set("User-Agent", "RustMe")
69                        .call()?
70                        .into_string()?
71                } else {
72                    std::fs::read_to_string(self.relative_to.join(section))?
73                };
74                let processed =
75                    process_markdown(&markdown, &self.relative_to, &mut snippets, &glossary)?;
76                output.write_all(processed.as_bytes())?;
77            }
78        }
79
80        Ok(())
81    }
82
83    fn load_glossaries(&self) -> Result<HashMap<String, String>, Error> {
84        let mut combined = HashMap::new();
85
86        for glossary in &self.glossaries {
87            match glossary {
88                Glossary::External(url) => {
89                    let glossary_text = ureq::get(url)
90                        .set("User-Agent", "RustMe")
91                        .call()?
92                        .into_string()?;
93                    let glossary = ron::from_str::<BTreeMap<String, String>>(&glossary_text)?;
94                    for (key, value) in glossary {
95                        combined.insert(key, value);
96                    }
97                }
98                Glossary::Inline(glossary) => {
99                    for (key, value) in glossary {
100                        combined.insert(key.to_string(), value.to_string());
101                    }
102                }
103            }
104        }
105
106        Ok(combined)
107    }
108}
109
110fn replace_references(
111    markdown: &str,
112    base_dir: &Path,
113    snippets: &mut HashMap<String, String>,
114    glossary: &HashMap<String, String>,
115) -> Result<String, Error> {
116    let mut processed = Vec::with_capacity(markdown.len());
117    let mut chars = StrByteIterator::new(markdown);
118    loop {
119        let skipped = chars.read_until_char(b'$')?;
120        if !skipped.is_empty() {
121            processed.extend(skipped.bytes());
122        }
123        // Skip the $, or exit if one wasn't found.
124        if chars.next().is_none() {
125            break;
126        }
127
128        let snippet_ref = chars.read_until_char(b'$')?;
129        // Skip the trailing $
130        if chars.next().is_none() {
131            return Err(Error::MalformedCodeBlock);
132        }
133        if snippet_ref.is_empty() {
134            // An escaped dollar sign
135            processed.push(b'$');
136        } else if let Some(value) = glossary.get(snippet_ref) {
137            processed.extend(value.bytes());
138        } else {
139            let snippet = load_snippet(snippet_ref, base_dir, snippets)?;
140            processed.extend(snippet.bytes());
141        }
142    }
143    Ok(String::from_utf8(processed)?)
144}
145
146fn preprocess_rust_codeblocks(markdown: &str) -> Result<String, Error> {
147    let mut processed = Vec::with_capacity(markdown.len());
148    let mut chars = StrByteIterator::new(markdown);
149    while let Some(ch) = chars.next() {
150        match ch {
151            b'`' => {
152                if chars.try_read("``rust") {
153                    // Preprocess rust blocks in the same way that rustdoc does.
154                    processed.extend(b"```rust");
155                    let rest_of_line = chars.read_line()?;
156                    processed.extend(rest_of_line.bytes());
157
158                    loop {
159                        let line = chars.read_line()?;
160                        if line.is_empty() {
161                            return Err(Error::MalformedCodeBlock);
162                        }
163                        let trimmed_start = line.trim_start();
164                        if trimmed_start.starts_with("```") {
165                            // Ends the code block
166                            processed.extend(line.bytes());
167                            break;
168                        } else if trimmed_start.starts_with("# ") {
169                            // A rust-doc comment
170                        } else {
171                            processed.extend(line.bytes());
172                        }
173                    }
174                } else {
175                    processed.push(ch);
176                }
177            }
178            ch => {
179                processed.push(ch);
180            }
181        }
182    }
183    Ok(String::from_utf8(processed)?)
184}
185
186fn process_markdown(
187    markdown: &str,
188    base_dir: &Path,
189    snippets: &mut HashMap<String, String>,
190    glossary: &HashMap<String, String>,
191) -> Result<String, Error> {
192    let expanded = replace_references(markdown, base_dir, snippets, glossary)?;
193    preprocess_rust_codeblocks(&expanded)
194}
195
196fn load_snippet<'a>(
197    snippet_ref: &str,
198    base_dir: &Path,
199    snippets: &'a mut HashMap<String, String>,
200) -> Result<&'a String, Error> {
201    if !snippets.contains_key(snippet_ref) {
202        let path = snippet_ref.split(':').next().unwrap();
203        load_snippets(path, &base_dir.join(path), snippets)?;
204    }
205
206    if let Some(snippet) = snippets.get(snippet_ref) {
207        Ok(snippet)
208    } else {
209        Err(Error::SnippetNotFound(snippet_ref.to_string()))
210    }
211}
212
213fn remove_shared_prefix(strings: &mut [&str]) {
214    if strings.is_empty() || strings[0].is_empty() {
215        return;
216    }
217
218    loop {
219        if strings[1..].iter().all(|string| {
220            !string.is_empty()
221                && string.as_bytes()[0].is_ascii_whitespace()
222                && string[0..1] == strings[0][0..1]
223        }) {
224            for string in strings.iter_mut() {
225                *string = &string[1..];
226            }
227        } else {
228            break;
229        }
230    }
231}
232
233fn load_snippets(
234    ref_path: &str,
235    disk_path: &Path,
236    snippets: &mut HashMap<String, String>,
237) -> Result<(), Error> {
238    const SNIPPET_START: &str = "begin rustme snippet:";
239    const SNIPPET_END: &str = "end rustme snippet";
240    let contents = std::fs::read_to_string(disk_path)?;
241    let mut current_snippet = Vec::new();
242    let mut current_snippet_name = None;
243    for line in contents.lines() {
244        if let Some(phrase_start) = line.find(SNIPPET_START) {
245            current_snippet_name = Some(
246                line[phrase_start + SNIPPET_START.len()..]
247                    .trim()
248                    .split(' ')
249                    .next()
250                    .unwrap(),
251            );
252            current_snippet = Vec::default();
253        } else if line.contains(SNIPPET_END) {
254            if let Some(name) = current_snippet_name.take() {
255                remove_shared_prefix(&mut current_snippet);
256                let contents = current_snippet.join("\n");
257                if snippets
258                    .insert(format!("{}:{}", ref_path, name), contents)
259                    .is_some()
260                {
261                    return Err(Error::SnippetAlreadyDefined(name.to_string()));
262                }
263            } else {
264                return Err(Error::MalformedSnippet);
265            }
266        } else if current_snippet_name.is_some() {
267            current_snippet.push(line);
268        }
269    }
270
271    Ok(())
272}
273
274struct StrByteIterator<'a> {
275    remaining: &'a [u8],
276}
277
278impl<'a> StrByteIterator<'a> {
279    pub const fn new(value: &'a str) -> Self {
280        Self {
281            remaining: value.as_bytes(),
282        }
283    }
284
285    pub fn try_read(&mut self, compare_against: &str) -> bool {
286        if self.remaining.starts_with(compare_against.as_bytes()) {
287            let (_, tail) = self.remaining.split_at(compare_against.len());
288            self.remaining = tail;
289            true
290        } else {
291            false
292        }
293    }
294
295    pub fn read_until(
296        &mut self,
297        mut cb: impl FnMut(u8) -> bool,
298        include_last_byte: bool,
299    ) -> Result<&'a str, Error> {
300        for (index, byte) in self.remaining.iter().copied().enumerate() {
301            // Do not offer non-ascii characters to the callback. This could
302            // allow splitting inside of a unicode code point.
303            if byte < 128 && cb(byte) {
304                let (read, tail) = if include_last_byte {
305                    self.remaining.split_at(index + 1)
306                } else {
307                    self.remaining.split_at(index)
308                };
309                self.remaining = tail;
310                return Ok(std::str::from_utf8(read)?);
311            }
312        }
313
314        let result = self.remaining;
315        self.remaining = b"";
316        Ok(std::str::from_utf8(result)?)
317    }
318
319    pub fn read_until_char(&mut self, ch: u8) -> Result<&'a str, Error> {
320        self.read_until(|byte| byte == ch, false)
321    }
322
323    pub fn read_line(&mut self) -> Result<&'a str, Error> {
324        self.read_until(|ch| ch == b'\n', true)
325    }
326}
327
328impl<'a> Iterator for StrByteIterator<'a> {
329    type Item = u8;
330
331    fn next(&mut self) -> Option<Self::Item> {
332        if self.remaining.is_empty() {
333            None
334        } else {
335            let (next, tail) = self.remaining.split_at(1);
336            self.remaining = tail;
337            next.get(0).copied()
338        }
339    }
340}
341
342/// A mapping of replacements that can be used within the files using `$name$`
343/// syntax.
344#[derive(Deserialize, Serialize, Debug)]
345pub enum Glossary {
346    /// An external glossary. The contained value should be a valid Url to a
347    /// Ron-encoded `HashMap<String, String>`.
348    External(String),
349    /// An inline glossary.
350    Inline(HashMap<String, String>),
351}
352
353#[test]
354fn test_no_glossary() {
355    let configuration: Configuration = ron::from_str(
356        r#"
357        Configuration(
358            files: {
359                "README.md": ["a", "b"],
360                "OTHERREADME.md": ["a", "b"],
361            }
362        )
363        "#,
364    )
365    .unwrap();
366    println!("Parsed: {:?}", configuration);
367}
368
369#[test]
370fn test_glossary() {
371    let configuration: Configuration = ron::from_str(
372        r#"
373        Configuration(
374            files: {
375                "README.md": ["a", "b"],
376                "OTHERREADME.md": ["a", "b"],
377            },
378            glossaries: [
379                Inline({
380                    "TEST": "SUCCESS",
381                })
382            ]
383        )
384        "#,
385    )
386    .unwrap();
387    println!("Parsed: {:?}", configuration);
388}
389
390/// All errors that `rustme` can return.
391#[derive(thiserror::Error, Debug)]
392pub enum Error {
393    /// A snippet reference is missing its closing `$`.
394    #[error("A snippet reference is missing its closing $")]
395    MalformedSnippetReference,
396    /// A mismatch of snippet begins and ends.
397    #[error("A mismatch of snippet begins and ends")]
398    MalformedSnippet,
399    /// A rust code block was not able to be parsed.
400    #[error("A rust code block was not able to be parsed")]
401    MalformedCodeBlock,
402    /// A snippet was already defined.
403    #[error("snippet already defined: {0}")]
404    SnippetAlreadyDefined(String),
405    /// A snippet was not found.
406    #[error("snippet not found: {0}")]
407    SnippetNotFound(String),
408    /// A snippet was begun but not ended.
409    #[error("snippet end not missing")]
410    SnippetEndNotFound,
411    /// An I/O error occurred.
412    #[error("io error: {0}")]
413    Io(#[from] std::io::Error),
414    /// A [Ron](https://github.com/ron-rs/ron) error.
415    #[error("ron error: {0}")]
416    Ron(#[from] ron::Error),
417    /// An error requesting an Http resource.
418    #[error("http error: {0}")]
419    Http(#[from] ureq::Error),
420    /// An invalid Unicode byte sequence was encountered.
421    #[error("unicode error: {0}")]
422    Unicode(String),
423}
424
425impl From<Utf8Error> for Error {
426    fn from(err: Utf8Error) -> Self {
427        Self::Unicode(err.to_string())
428    }
429}
430
431impl From<FromUtf8Error> for Error {
432    fn from(err: FromUtf8Error) -> Self {
433        Self::Unicode(err.to_string())
434    }
435}