Skip to main content

mdcat/
lib.rs

1// Copyright 2018-2020 Sebastian Wiesner <sebastian@swsnr.de>
2
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! mdcat: render markdown to TTYs.
8//!
9//! This crate exposes both the command-line interface entry points and the core rendering
10//! library (previously published as `pulldown-cmark-mdcat`). See [`push_tty`] for the main
11//! library entry point, and [`process_file`] for the CLI-level helper that reads markdown
12//! from a file and renders it to the given [`Output`].
13//!
14//! ## Features
15//!
16//! - `default` enables `svg` and `image-processing`.
17//!
18//! - `svg` includes support for rendering SVG images to PNG for terminals which do not support SVG
19//!   images natively.  This feature adds a dependency on `resvg`.
20//!
21//! - `image-processing` enables processing of pixel images before rendering.  This feature adds
22//!   a dependency on `image`.  If disabled mdcat will not be able to render inline images on some
23//!   terminals, or render images incorrectly or at wrong sizes on other terminals.
24//!
25//!   Do not disable this feature unless you are sure that you won't use inline images, or accept
26//!   incomplete rendering of images.  Please do not report issues with inline images with this
27//!   feature disabled.
28
29#![deny(warnings, missing_docs, clippy::all)]
30#![forbid(unsafe_code)]
31
32use std::fs::File;
33use std::io::{stdin, BufWriter, Error, ErrorKind, Read, Result, Write};
34use std::path::{Path, PathBuf};
35
36use anyhow::Context;
37use gethostname::gethostname;
38use pulldown_cmark::{Event, Options, Parser};
39use syntect::parsing::SyntaxSet;
40use tracing::{event, instrument, Level};
41use url::Url;
42
43pub use crate::error::{RenderError, RenderResult};
44pub use crate::render::{NoopObserver, RenderObserver};
45pub use crate::resources::ResourceUrlHandler;
46pub use crate::terminal::capabilities::TerminalCapabilities;
47pub use crate::terminal::{Multiplexer, TerminalProgram, TerminalSize};
48pub use crate::theme::Theme;
49
50mod error;
51pub mod mdless;
52mod references;
53pub mod resources;
54pub mod terminal;
55mod theme;
56
57mod render;
58
59/// Argument parsing for mdcat.
60pub mod args;
61/// Shared CLI entry point for the `mdcat` and `mdless` binaries.
62pub mod cli;
63/// Output handling for mdcat.
64pub mod output;
65
66use crate::args::ResourceAccess;
67use crate::output::Output;
68use crate::resources::{CurlResourceHandler, DispatchingResourceHandler, FileResourceHandler};
69
70/// Default read size limit for resources (100 MiB).
71pub static DEFAULT_RESOURCE_READ_LIMIT: u64 = 104_857_600;
72
73/// HTTP `User-Agent` header for remote resource fetches.
74const USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));
75
76/// CommonMark + the GFM extensions mdcat renders natively.
77///
78/// CommonMark is the core spec. Task lists, strikethrough, and pipe
79/// tables come from GitHub Flavored Markdown. Smart punctuation
80/// replaces straight quotes and `--`/`...` with typographic
81/// equivalents at parse time. GFM alert blockquotes (`> [!NOTE]`,
82/// `> [!WARNING]`, …) are tagged with a [`pulldown_cmark::BlockQuoteKind`]
83/// that the renderer surfaces as a coloured label. Footnotes,
84/// definition lists, and wiki links are rendered inline with a
85/// matching bottom-of-document footnote section.
86pub fn markdown_options() -> Options {
87    Options::ENABLE_TASKLISTS
88        | Options::ENABLE_STRIKETHROUGH
89        | Options::ENABLE_TABLES
90        | Options::ENABLE_SMART_PUNCTUATION
91        | Options::ENABLE_GFM
92        | Options::ENABLE_FOOTNOTES
93        | Options::ENABLE_DEFINITION_LIST
94        | Options::ENABLE_WIKILINKS
95}
96
97/// Settings for markdown rendering.
98#[derive(Debug)]
99pub struct Settings<'a> {
100    /// Capabilities of the terminal mdcat writes to.
101    pub terminal_capabilities: TerminalCapabilities,
102    /// The size of the terminal mdcat writes to.
103    pub terminal_size: TerminalSize,
104    /// Detected terminal multiplexer (tmux/screen), if any.
105    ///
106    /// When non-`None`, image protocol output is wrapped in DCS passthrough
107    /// so the multiplexer forwards it to the real terminal.
108    pub multiplexer: Multiplexer,
109    /// Syntax set for syntax highlighting of code blocks.
110    pub syntax_set: &'a SyntaxSet,
111    /// Colour theme for mdcat
112    pub theme: Theme,
113    /// Wrap code-block lines that exceed the terminal width instead of
114    /// overflowing the right border.
115    pub wrap_code: bool,
116}
117
118/// The environment to render markdown in.
119#[derive(Debug)]
120pub struct Environment {
121    /// The base URL to resolve relative URLs with.
122    pub base_url: Url,
123    /// The local host name.
124    pub hostname: String,
125}
126
127impl Environment {
128    /// Create an environment for the local host with the given `base_url`.
129    ///
130    /// Take the local hostname from `gethostname`.
131    pub fn for_localhost(base_url: Url) -> Result<Self> {
132        gethostname()
133            .into_string()
134            .map_err(|raw| {
135                Error::new(
136                    ErrorKind::InvalidData,
137                    format!("gethostname() returned invalid unicode data: {raw:?}"),
138                )
139            })
140            .map(|hostname| Environment { base_url, hostname })
141    }
142
143    /// Create an environment for a local directory.
144    ///
145    /// Convert the directory to a directory URL, and obtain the hostname from `gethostname`.
146    ///
147    /// `base_dir` must be an absolute path; return an IO error with `ErrorKind::InvalidInput`
148    /// otherwise.
149    pub fn for_local_directory<P: AsRef<Path>>(base_dir: &P) -> Result<Self> {
150        Url::from_directory_path(base_dir)
151            .map_err(|()| {
152                Error::new(
153                    ErrorKind::InvalidInput,
154                    format!(
155                        "Base directory {} must be an absolute path",
156                        base_dir.as_ref().display()
157                    ),
158                )
159            })
160            .and_then(Self::for_localhost)
161    }
162}
163
164/// Write markdown to a TTY.
165///
166/// Iterate over the Markdown AST `events`, format each one, and send the
167/// result to `writer` using `settings` and `environment` to drive styling
168/// and resource access.
169///
170/// `push_tty` tries to limit output to the configured terminal columns,
171/// but does not guarantee staying within the column limit (long words,
172/// images, and inline code can overflow).
173///
174/// Delegates to [`push_tty_with_observer`] with a [`NoopObserver`]. Callers
175/// that need structural information about the rendered output — heading
176/// positions, link ranges, and so on — should use that variant directly.
177#[instrument(level = "debug", skip_all, fields(environment.hostname = environment.hostname.as_str(), environment.base_url = &environment.base_url.as_str()))]
178pub fn push_tty<'a, 'e, W, I>(
179    settings: &Settings,
180    environment: &Environment,
181    resource_handler: &dyn ResourceUrlHandler,
182    writer: &'a mut W,
183    events: I,
184) -> RenderResult<()>
185where
186    I: Iterator<Item = Event<'e>>,
187    W: Write,
188{
189    push_tty_with_observer(
190        settings,
191        environment,
192        resource_handler,
193        writer,
194        events,
195        &mut NoopObserver,
196    )
197}
198
199/// Render Markdown to a TTY while handing every event to an observer.
200///
201/// Same semantics as [`push_tty`]. On each iteration the observer is
202/// called with the output byte offset and the event about to be rendered,
203/// so the observer can build a side-table of structural positions (which
204/// heading starts at which output byte, where each link anchors, etc.).
205///
206/// The output writer is wrapped in a byte-counting adapter so the
207/// observer sees exact cursor positions even when the underlying writer
208/// performs partial writes.
209pub fn push_tty_with_observer<'a, 'e, W, I, O>(
210    settings: &Settings,
211    environment: &Environment,
212    resource_handler: &dyn ResourceUrlHandler,
213    writer: &'a mut W,
214    events: I,
215    observer: &mut O,
216) -> RenderResult<()>
217where
218    I: Iterator<Item = Event<'e>>,
219    W: Write,
220    O: RenderObserver + ?Sized,
221{
222    use render::*;
223
224    let mut counted = CountingWriter::new(writer);
225    let mut current = StateAndData(State::default(), StateData::default());
226    for event in events {
227        observer.on_event(counted.bytes(), &event);
228        let StateAndData(state, data) = current;
229        current = write_event(
230            &mut counted,
231            settings,
232            environment,
233            &resource_handler,
234            state,
235            data,
236            event,
237        )?;
238    }
239    let StateAndData(final_state, final_data) = current;
240    finish(&mut counted, settings, environment, final_state, final_data)
241}
242
243/// Read input for `filename`.
244///
245/// If `filename` is `-` read from standard input, otherwise try to open and
246/// read the given file.
247pub fn read_input<T: AsRef<str>>(filename: T) -> anyhow::Result<(PathBuf, String)> {
248    let cd = std::env::current_dir()?;
249    let mut buffer = String::new();
250
251    if filename.as_ref() == "-" {
252        stdin().read_to_string(&mut buffer)?;
253        Ok((cd, buffer))
254    } else {
255        let mut source = File::open(filename.as_ref())?;
256        source.read_to_string(&mut buffer)?;
257        let base_dir = cd
258            .join(filename.as_ref())
259            .parent()
260            .map(|p| p.to_path_buf())
261            .unwrap_or(cd);
262        Ok((base_dir, buffer))
263    }
264}
265
266/// Process a single file.
267///
268/// Read from `filename` and render the contents to `output`.
269#[instrument(skip(output, settings, resource_handler), level = "debug")]
270pub fn process_file(
271    filename: &str,
272    settings: &Settings,
273    access: ResourceAccess,
274    resource_handler: &dyn ResourceUrlHandler,
275    output: &mut Output,
276) -> anyhow::Result<()> {
277    let (base_dir, input) = read_input(filename)?;
278    event!(
279        Level::TRACE,
280        "Read input, using {} as base directory",
281        base_dir.display()
282    );
283    let env = Environment::for_local_directory(&base_dir)?;
284    // Collect the event stream so the remote-image prefetch can run
285    // before the render loop. On `--local` runs (the default) the
286    // wrapper degenerates to a no-op passthrough.
287    let events: Vec<_> = Parser::new_ext(&input, markdown_options()).collect();
288    let caching = match access {
289        ResourceAccess::Remote => resources::prefetch_and_wrap(
290            &events,
291            &env,
292            USER_AGENT,
293            DEFAULT_RESOURCE_READ_LIMIT,
294            resource_handler,
295        ),
296        ResourceAccess::LocalOnly => {
297            resources::CachingResourceHandler::passthrough(resource_handler)
298        }
299    };
300    let resource_handler: &dyn ResourceUrlHandler = &caching;
301
302    let mut sink = BufWriter::new(output.writer());
303    let outcome = push_tty(
304        settings,
305        &env,
306        resource_handler,
307        &mut sink,
308        events.into_iter(),
309    )
310    .and_then(|()| {
311        event!(Level::TRACE, "Finished rendering, flushing output");
312        sink.flush().map_err(RenderError::from)
313    });
314    match outcome {
315        Ok(()) => Ok(()),
316        Err(RenderError::Io(ref io)) if io.kind() == ErrorKind::BrokenPipe => {
317            event!(Level::TRACE, "Ignoring broken pipe");
318            Ok(())
319        }
320        Err(error) => {
321            event!(Level::ERROR, ?error, "Failed to process file: {:#}", error);
322            Err(error.into())
323        }
324    }
325}
326
327/// Create the resource handler for mdcat.
328pub fn create_resource_handler(
329    access: ResourceAccess,
330) -> anyhow::Result<DispatchingResourceHandler> {
331    let mut resource_handlers: Vec<Box<dyn ResourceUrlHandler>> = vec![Box::new(
332        FileResourceHandler::new(DEFAULT_RESOURCE_READ_LIMIT),
333    )];
334    if let ResourceAccess::Remote = access {
335        // libcurl's process-wide init runs here, not at CLI entry, so
336        // `--local` invocations skip the cost entirely.
337        curl::init();
338        event!(target: "mdcat::main", Level::DEBUG, "HTTP client with user agent {USER_AGENT}");
339        let client = CurlResourceHandler::create(DEFAULT_RESOURCE_READ_LIMIT, USER_AGENT)
340            .context("build HTTP client")?;
341        resource_handlers.push(Box::new(client));
342    }
343    Ok(DispatchingResourceHandler::new(resource_handlers))
344}
345
346#[cfg(test)]
347mod tests {
348    use pulldown_cmark::Parser;
349
350    use crate::resources::NoopResourceHandler;
351
352    use super::*;
353
354    mod observer {
355        use pulldown_cmark::{Event, Options, Parser, Tag};
356
357        use super::*;
358
359        /// Observer that records the `(kind, byte_offset)` of each
360        /// interesting structural event. Used only in tests.
361        #[derive(Default)]
362        struct Recorder {
363            entries: Vec<(String, u64)>,
364        }
365
366        impl RenderObserver for Recorder {
367            fn on_event(&mut self, byte_offset: u64, event: &Event<'_>) {
368                let kind = match event {
369                    Event::Start(Tag::Heading { level, .. }) => format!("start_h{}", *level as u8),
370                    Event::End(end) => format!("end_{end:?}"),
371                    Event::Text(t) => format!("text:{t}"),
372                    _ => return,
373                };
374                self.entries.push((kind, byte_offset));
375            }
376        }
377
378        #[test]
379        fn observer_sees_heading_events_with_increasing_offsets() {
380            let markdown = "# First\n\n## Second\n\nbody\n";
381            let parser = Parser::new_ext(markdown, Options::empty());
382            let mut sink: Vec<u8> = Vec::new();
383            let env =
384                Environment::for_local_directory(&std::env::current_dir().expect("cwd available"))
385                    .expect("env");
386            let settings = Settings {
387                terminal_capabilities: TerminalProgram::Dumb.capabilities(),
388                terminal_size: TerminalSize::default(),
389                multiplexer: Multiplexer::default(),
390                syntax_set: &SyntaxSet::default(),
391                theme: Theme::default(),
392                wrap_code: false,
393            };
394            let mut recorder = Recorder::default();
395
396            push_tty_with_observer(
397                &settings,
398                &env,
399                &NoopResourceHandler,
400                &mut sink,
401                parser,
402                &mut recorder,
403            )
404            .expect("render");
405
406            // We expect to see both headings appear in order, the H1 before
407            // the H2, and each heading's offset must be <= the offset of
408            // the heading that follows it.
409            let headings: Vec<_> = recorder
410                .entries
411                .iter()
412                .filter(|(k, _)| k.starts_with("start_h"))
413                .collect();
414            assert_eq!(
415                headings.len(),
416                2,
417                "saw {} headings in {:?}",
418                headings.len(),
419                recorder.entries
420            );
421            assert_eq!(headings[0].0, "start_h1");
422            assert_eq!(headings[1].0, "start_h2");
423            assert!(
424                headings[0].1 <= headings[1].1,
425                "H1 offset {} should not exceed H2 offset {}",
426                headings[0].1,
427                headings[1].1
428            );
429        }
430
431        #[test]
432        fn noop_observer_matches_plain_push_tty_byte_for_byte() {
433            let markdown = "# Title\n\nSome *emphasis* and `code`.\n\n- item one\n- item two\n";
434            let env =
435                Environment::for_local_directory(&std::env::current_dir().expect("cwd available"))
436                    .expect("env");
437            let settings = Settings {
438                terminal_capabilities: TerminalProgram::Ansi.capabilities(),
439                terminal_size: TerminalSize::default(),
440                multiplexer: Multiplexer::default(),
441                syntax_set: &SyntaxSet::default(),
442                theme: Theme::default(),
443                wrap_code: false,
444            };
445
446            let mut plain: Vec<u8> = Vec::new();
447            push_tty(
448                &settings,
449                &env,
450                &NoopResourceHandler,
451                &mut plain,
452                Parser::new_ext(markdown, Options::empty()),
453            )
454            .expect("plain");
455
456            let mut observed: Vec<u8> = Vec::new();
457            push_tty_with_observer(
458                &settings,
459                &env,
460                &NoopResourceHandler,
461                &mut observed,
462                Parser::new_ext(markdown, Options::empty()),
463                &mut NoopObserver,
464            )
465            .expect("observed");
466
467            // The observer hook must not perturb rendering; the two paths
468            // produce identical output.
469            assert_eq!(plain, observed);
470        }
471    }
472
473    fn render_string(input: &str, settings: &Settings) -> RenderResult<String> {
474        let source = Parser::new(input);
475        let mut sink = Vec::new();
476        let env =
477            Environment::for_local_directory(&std::env::current_dir().expect("Working directory"))?;
478        push_tty(settings, &env, &NoopResourceHandler, &mut sink, source)?;
479        Ok(String::from_utf8_lossy(&sink).into())
480    }
481
482    fn render_string_dumb(markup: &str) -> RenderResult<String> {
483        render_string(
484            markup,
485            &Settings {
486                syntax_set: &SyntaxSet::default(),
487                terminal_capabilities: TerminalProgram::Dumb.capabilities(),
488                terminal_size: TerminalSize::default(),
489                multiplexer: Multiplexer::default(),
490                theme: Theme::default(),
491                wrap_code: false,
492            },
493        )
494    }
495
496    mod layout {
497        use super::render_string_dumb;
498        use insta::assert_snapshot;
499
500        #[test]
501        #[allow(non_snake_case)]
502        fn GH_49_format_no_colour_simple() {
503            assert_eq!(
504                render_string_dumb("_lorem_ **ipsum** dolor **sit** _amet_").unwrap(),
505                "lorem ipsum dolor sit amet\n",
506            )
507        }
508
509        #[test]
510        fn begins_with_rule() {
511            assert_snapshot!(render_string_dumb("----").unwrap())
512        }
513
514        #[test]
515        fn begins_with_block_quote() {
516            assert_snapshot!(render_string_dumb("> Hello World").unwrap());
517        }
518
519        #[test]
520        fn rule_in_block_quote() {
521            assert_snapshot!(render_string_dumb(
522                "> Hello World
523
524> ----"
525            )
526            .unwrap());
527        }
528
529        #[test]
530        fn heading_in_block_quote() {
531            assert_snapshot!(render_string_dumb(
532                "> Hello World
533
534> # Hello World"
535            )
536            .unwrap())
537        }
538
539        #[test]
540        fn heading_levels() {
541            assert_snapshot!(render_string_dumb(
542                "
543# First
544
545## Second
546
547### Third"
548            )
549            .unwrap())
550        }
551
552        #[test]
553        fn autolink_creates_no_reference() {
554            assert_eq!(
555                render_string_dumb("Hello <http://example.com>").unwrap(),
556                "Hello http://example.com\n"
557            )
558        }
559
560        #[test]
561        fn flush_ref_links_before_toplevel_heading() {
562            assert_snapshot!(render_string_dumb(
563                "> Hello [World](http://example.com/world)
564
565> # No refs before this headline
566
567# But before this"
568            )
569            .unwrap())
570        }
571
572        #[test]
573        fn flush_ref_links_at_end() {
574            assert_snapshot!(render_string_dumb(
575                "Hello [World](http://example.com/world)
576
577# Headline
578
579Hello [Donald](http://example.com/Donald)"
580            )
581            .unwrap())
582        }
583    }
584
585    mod disabled_features {
586        use insta::assert_snapshot;
587
588        use super::render_string_dumb;
589
590        #[test]
591        #[allow(non_snake_case)]
592        fn GH_155_do_not_choke_on_footnotes() {
593            assert_snapshot!(render_string_dumb(
594                "A footnote [^1]
595
596[^1: We do not support footnotes."
597            )
598            .unwrap())
599        }
600    }
601}