adbook/build/convert/
adoc.rs

1/*!
2`asciidoctor` runner and metadata extracter
3*/
4
5use std::{
6    path::{Path, PathBuf},
7    process::Command,
8};
9
10use anyhow::{bail, ensure, Context, Result};
11use thiserror::Error;
12
13use crate::book::{config::CmdOptions, BookStructure};
14
15// --------------------------------------------------------------------------------
16// `asciidoctor` runner
17
18// TODO:
19// pub type Result<T> = std::result::Result<T, AdocError>;
20
21/// Structure for error printing
22///
23/// TODO: refactor and prefer it to anyhow::Error
24#[derive(Debug, Error, Clone)]
25pub enum AdocError {
26    #[error("Failed to convert file: {0}\nasciidoctor output\n--------------------------------\n{1}\n--------------------------------")]
27    FailedToConvert(PathBuf, String),
28}
29
30/// Context for running `asciidoctor`
31///
32/// # String interpolation
33///
34/// [`Self::replace_placeholder_strings`] does it.
35///
36/// # Asciidoctor options that are not used
37///
38/// ## `asciidoctor -B`
39///
40/// It's used to supply (virtual) directory, especially when the input is stdin. The
41/// directory path is used for the "safe mode".
42///
43/// ## `asciidoctor -D`
44///
45/// It's for specifying output file path and good for mirroing one directory to another:
46///
47/// ```sh
48/// $ asciidoctor -D out -R . '**/*.adoc'
49/// ```
50#[derive(Debug, Clone)]
51pub struct AdocRunContext {
52    /// Source directory
53    src_dir: String,
54    /// `asciidoctor -a` (attributes) or other options
55    opts: CmdOptions,
56    /// Used to modify `asciidoctor` attributes supplied to `.adoc` files
57    base_url: String,
58}
59
60impl AdocRunContext {
61    pub fn from_book(book: &BookStructure) -> Result<Self> {
62        let src_dir = normalize(&book.src_dir_path())?;
63
64        Ok(Self {
65            src_dir,
66            opts: book.book_ron.adoc_opts.clone(),
67            base_url: book.book_ron.base_url.to_string(),
68        })
69    }
70
71    /// Embedded mode: output without header (including title) and footer
72    pub fn set_embedded_mode(&mut self, b: bool) {
73        if b {
74            self.opts.push(("--embedded".to_string(), vec![]));
75        } else {
76            self.opts = self
77                .opts
78                .clone()
79                .into_iter()
80                .filter(|(name, _values)| name == "--embedded")
81                .collect();
82        }
83    }
84
85    /// Applies `asciidoctor` options defined in `book.ron`
86    pub fn apply_options(&self, cmd: &mut Command) {
87        // setup directory settings
88        cmd.current_dir(&self.src_dir).args(&["-B", &self.src_dir]);
89
90        // setup user options
91        for (opt, args) in &self.opts {
92            // case 1. option without argument
93            if args.is_empty() {
94                cmd.arg(opt);
95                continue;
96            }
97
98            // case 2. (option with argument) specified n times
99            // like, -a linkcss -a sectnums ..
100            for arg in args {
101                let arg = self.replace_placeholder_strings(arg);
102                cmd.args(&[opt, &arg]);
103            }
104        }
105    }
106
107    pub fn replace_placeholder_strings(&self, arg: &str) -> String {
108        let arg = arg.replace(r#"{base_url}"#, &self.base_url);
109        let arg = arg.replace(r#"{src_dir}"#, &self.src_dir);
110
111        arg
112    }
113}
114
115/// UNC path is not recognized by `asciidoctor`, so this is the hot fix:
116fn normalize(path: &Path) -> Result<String> {
117    let s = format!("{}", path.canonicalize()?.display());
118    let s = s
119        .strip_prefix(r#"\\?\"#)
120        .map(|s| {
121            // `\\?\C:\` → `c:\`
122            s.replace(r#"\\?\"#, "/")
123        })
124        .unwrap_or(s);
125    Ok(s)
126}
127
128/// Sets up `asciidoctor` command
129pub fn asciidoctor(src_file: &Path, acx: &AdocRunContext) -> Result<Command> {
130    ensure!(
131        src_file.exists(),
132        "Given non-existing file as conversion source"
133    );
134
135    // NOTE: On windows `Command` did not find `asciidoctor`, so let's give absolute path to it.
136    let asciidoctor = which::which("asciidoctor").unwrap();
137    let mut cmd = Command::new(format!("{}", asciidoctor.display()));
138
139    // output to stdout
140    // NOTE: `fs::canonizalize` returns the carsed UNC path on Windows.
141    cmd.arg(&normalize(src_file)?).args(&["-o", "-"]);
142
143    // require `asciidoctor-diagram`
144    cmd.args(&["-r", "asciidoctor-diagram"]);
145
146    // prefer verbose output
147    cmd.arg("--trace").arg("--verbose");
148
149    // apply directory settings and user options (often ones defined in `book.ron`)
150    acx.apply_options(&mut cmd);
151
152    Ok(cmd)
153}
154
155/// Runs `asciidoctor` command and returns the output
156pub fn run_asciidoctor(src_file: &Path, acx: &AdocRunContext) -> Result<std::process::Output> {
157    let mut cmd =
158        self::asciidoctor(src_file, acx).context("when setting up `asciidoctor` options")?;
159
160    // trace!("{:?}", cmd);
161
162    let output = match cmd.output() {
163        Ok(output) => output,
164        Err(err) => {
165            bail!(
166                "when running `asciidoctor`:\n  src: {}\n  cmd: {:?}\n  stdout: {:?}",
167                normalize(src_file)?,
168                cmd,
169                err
170            )
171        }
172    };
173
174    Ok(output)
175}
176
177/// Runs `asciidoctor` command and writes the output to a string buffer
178pub fn run_asciidoctor_buf(buf: &mut String, src_file: &Path, acx: &AdocRunContext) -> Result<()> {
179    let output = self::run_asciidoctor(src_file, acx)?;
180
181    // ensure the conversion succeeded
182    ensure!(
183        output.status.success(),
184        // ..or else report it as an error
185        AdocError::FailedToConvert(
186            src_file.to_path_buf(),
187            String::from_utf8(output.stderr)
188                .unwrap_or("<non-UTF8 stderr by `asciidoctor`>".to_string())
189        )
190    );
191
192    // finally output to the buffer
193    let text = std::str::from_utf8(&output.stdout)
194        .with_context(|| "Unable to decode stdout of `asciidoctor` as UTF8")?;
195    buf.push_str(text);
196
197    // stderr
198    if !output.stderr.is_empty() {
199        eprintln!(
200            "Asciidoctor stderr while converting {}:",
201            src_file.display()
202        );
203        let err = String::from_utf8(output.stderr)
204            .unwrap_or("<non-UTF8 stderr by `asciidoctor`>".to_string());
205        eprintln!("{}", &err);
206    }
207
208    Ok(())
209}
210
211// --------------------------------------------------------------------------------
212// Metadata extraction
213
214/// Attribute of an Asciidoctor document
215///
216/// Different from Asciidoctor, document attributes specified with command line arguments are always
217/// overwritable by default.
218#[derive(Debug, Clone, PartialEq, Eq)]
219pub enum AdocAttr {
220    /// :!<attribute>:
221    Deny(String),
222    /// :<attribute>: value
223    Allow(String, String),
224}
225
226impl AdocAttr {
227    pub fn name(&self) -> &str {
228        match self {
229            AdocAttr::Deny(name) => name,
230            AdocAttr::Allow(name, _value) => name,
231        }
232    }
233
234    pub fn value(&self) -> Option<&str> {
235        match self {
236            AdocAttr::Deny(_name) => None,
237            AdocAttr::Allow(_name, value) => Some(value),
238        }
239    }
240}
241
242/// Constructors
243impl AdocAttr {
244    /// "name" -> Deny("name")
245    pub fn deny(name: impl Into<String>) -> Self {
246        AdocAttr::Deny(name.into())
247    }
248
249    /// "name", "value"
250    pub fn allow(name: impl Into<String>, value: impl Into<String>) -> Self {
251        AdocAttr::Allow(name.into(), value.into())
252    }
253
254    /// "name" -> Allow("attr") | Deny("attr")
255    pub fn from_name(name: &str) -> Self {
256        if name.starts_with('!') {
257            Self::deny(&name[1..])
258        } else {
259            Self::allow(name, "")
260        }
261    }
262}
263
264/// Asciidoctor metadata supplied to Handlebars data
265///
266/// We have to extract them manually because `asciidoctor --embedded` doesn't generate document
267/// title and header.
268#[derive(Debug, Clone, PartialEq, Eq)]
269pub struct AdocMetadata {
270    pub title: Option<String>,
271    attrs: Vec<AdocAttr>,
272    // TODO: supply base attribute set from `book.ron`
273    base: Option<Box<Self>>,
274}
275
276impl AdocMetadata {
277    /// Tries to find an attribute with name. Duplicates are not conisdered
278    pub fn find_attr(&self, name: &str) -> Option<&AdocAttr> {
279        // from self
280        if let Some(attr) = self.attrs.iter().find(|a| a.name() == name) {
281            return Some(attr);
282        }
283
284        // from base
285        if let Some(ref base) = self.base {
286            return base.find_attr(name);
287        }
288
289        None
290    }
291}
292
293/// Parsers
294impl AdocMetadata {
295    /// Sets the fallback [`AdocMetadata`]
296    pub fn derive(&mut self, base: Self) {
297        self.base = Some(Box::new(base));
298    }
299
300    /// Extracts metadata from AsciiDoc string and sets up fallback attributes from `asciidoctor`
301    /// command line options
302    pub fn extract_with_base(adoc_text: &str, acx: &AdocRunContext) -> Self {
303        let mut meta = Self::extract(adoc_text, acx);
304
305        let base = Self::from_cmd_opts(&acx.opts, acx);
306        meta.derive(base);
307
308        meta
309    }
310
311    /// "Whitespace" line or comment lines are skipped when extracting header and attributes
312    fn is_line_to_skip(ln: &str) -> bool {
313        let ln = ln.trim();
314        ln.is_empty() || ln.starts_with("//")
315    }
316
317    /// Extracts metadata from AsciiDoc string
318    ///
319    /// Replaces placeholder strings in attribute values.
320    pub fn extract(text: &str, acx: &AdocRunContext) -> Self {
321        let mut lines = text
322            .lines()
323            .filter(|ln| !Self::is_line_to_skip(ln))
324            .peekable();
325
326        // = Title
327        let title = match lines.peek() {
328            Some(ln) if ln.starts_with("= ") => {
329                let ln = lines.next().unwrap();
330                Some(ln[2..].trim().to_string())
331            }
332            _ => None,
333        };
334
335        // :attribute: value
336        let mut attrs = Vec::with_capacity(10);
337        while let Some(line_str) = lines.next() {
338            // locate two colons (`:`)
339            let mut colons = line_str.bytes().enumerate().filter(|(_i, c)| *c == b':');
340
341            // first `:`
342            match colons.next() {
343                // line starting with `:`
344                Some((ix, _c)) if ix == 0 => {}
345                // line not starting with `:`
346                Some((_ix, _c)) => continue,
347                None => break,
348            }
349
350            // second `:`
351            let pos = match colons.next() {
352                Some((i, _c)) => i,
353                None => continue,
354            };
355
356            use std::str::from_utf8;
357            let line = line_str.as_bytes();
358
359            // :attribute: value
360            let name = match from_utf8(&line[1..pos]) {
361                Ok(name) => name.trim(),
362                Err(_err) => {
363                    eprintln!("Bug! AdocMetadata error line: {}", line_str);
364                    continue;
365                }
366            };
367
368            let value = match from_utf8(&line[pos + 1..]) {
369                Ok(v) => v.trim(),
370                Err(_err) => {
371                    eprintln!("Bug! AdocMetadata error line: {}", line_str);
372                    continue;
373                }
374            };
375
376            if name.starts_with('!') {
377                // :!attribute:
378                attrs.push(AdocAttr::deny(&name[1..]));
379            } else {
380                // :attribute: value
381                let value = acx.replace_placeholder_strings(value);
382                attrs.push(AdocAttr::allow(name, value));
383            }
384        }
385
386        Self {
387            title,
388            attrs,
389            base: None,
390        }
391    }
392
393    /// Extracts `asciidoctor` options that matches to `-a attr=value`
394    pub fn from_cmd_opts(opts: &CmdOptions, acx: &AdocRunContext) -> Self {
395        let attr_opts = match opts.iter().find(|(opt_name, _attr_opts)| opt_name == "-a") {
396            Some((_opt_name, opts)) => opts,
397            None => {
398                return Self {
399                    title: None,
400                    attrs: vec![],
401                    base: None,
402                }
403            }
404        };
405
406        let mut attrs = Vec::with_capacity(10);
407
408        for opt in attr_opts.iter() {
409            let eq_pos = opt
410                .bytes()
411                .enumerate()
412                .find(|(_i, c)| *c == b'=')
413                .map(|(i, _c)| i)
414                .unwrap_or(0);
415
416            // attr | !attr
417            if eq_pos == 0 {
418                attrs.push(AdocAttr::from_name(opt));
419                continue;
420            }
421
422            // name=value | name@=value | name=value@
423            // we'll just ignore `@` symbols; different from the original Asciidoctor, attributes
424            // are always overridable by documents
425            let mut name = &opt[0..eq_pos];
426            if name.ends_with('@') {
427                name = &name[0..name.len() - 1];
428            }
429
430            let mut value = &opt[eq_pos + 1..];
431            if value.ends_with('@') {
432                value = &value[0..value.len() - 1];
433            }
434
435            let value = acx.replace_placeholder_strings(value);
436            attrs.push(AdocAttr::allow(name, &value));
437        }
438
439        Self {
440            title: None,
441            attrs,
442            base: None,
443        }
444    }
445}
446
447#[cfg(test)]
448mod test {
449    use super::{AdocAttr, AdocMetadata, AdocRunContext};
450
451    const ARTICLE: &str = r###"
452// ^ blank line
453
454= Title here!
455
456:revdate: Oct 23, 2020
457// whitespace again
458
459:author: someone
460:!sectnums: these text are omitted
461
462First paragraph!
463"###;
464
465    #[test]
466    fn simple_metadata() {
467        // dummy
468        let acx = AdocRunContext {
469            src_dir: ".".to_string(),
470            opts: vec![],
471            base_url: "".to_string(),
472        };
473
474        let metadata = AdocMetadata::extract(ARTICLE, &acx);
475
476        assert_eq!(
477            metadata,
478            AdocMetadata {
479                title: Some("Title here!".to_string()),
480                attrs: vec![
481                    AdocAttr::allow("revdate", "Oct 23, 2020"),
482                    AdocAttr::allow("author", "someone"),
483                    AdocAttr::deny("sectnums"),
484                ],
485                base: None,
486            }
487        );
488
489        assert_eq!(
490            metadata.find_attr("author"),
491            Some(&AdocAttr::allow("author", "someone"))
492        );
493    }
494
495    #[test]
496    fn base_test() {
497        let mail = "someone@mail.domain";
498
499        let cmd_opts = vec![(
500            "-a".to_string(),
501            vec!["sectnums".to_string(), format!("email={}", mail)],
502        )];
503
504        // dummy
505        let acx = AdocRunContext {
506            src_dir: ".".to_string(),
507            opts: cmd_opts,
508            base_url: "".to_string(),
509        };
510
511        let deriving = AdocMetadata::extract_with_base(ARTICLE, &acx);
512
513        assert_eq!(
514            deriving.find_attr("sectnums"),
515            Some(&AdocAttr::deny("sectnums"))
516        );
517
518        assert_eq!(
519            deriving.find_attr("email"),
520            Some(&AdocAttr::allow("email", mail))
521        );
522    }
523}