Skip to main content

mdcat/mdless/
buffer.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5//! In-memory document for the interactive view.
6//!
7//! [`RenderedDoc`] holds the styled bytes, an ANSI-stripped plain
8//! copy, parallel line-start indexes, and a heading list. Built
9//! once from `push_tty` output plus a [`HeadingRecorder`].
10
11use pulldown_cmark::{Event, HeadingLevel, Tag, TagEnd};
12
13use crate::RenderObserver;
14
15/// One heading in the rendered document.
16#[derive(Debug, Clone)]
17pub struct HeadingEntry {
18    /// Markdown heading level, `1` (`#`) through `6` (`######`).
19    pub level: u8,
20    /// Concatenated inline text of the heading.
21    pub text: String,
22    /// Byte offset in the plain buffer where the heading line begins.
23    pub plain_offset: usize,
24}
25
26/// Pre-rendered markdown document with searchable plain copy + line index.
27///
28/// Built once per input; scrolled and searched interactively.
29#[derive(Debug)]
30pub struct RenderedDoc {
31    /// Styled bytes from `push_tty`, including SGR + OSC 8 sequences.
32    pub styled: Vec<u8>,
33    /// ANSI-stripped view of `styled`; search corpus.
34    pub plain: String,
35    /// Byte offsets where each line begins in `plain`. Final entry equals
36    /// `plain.len()` so line spans are always `starts[i..i+1]`.
37    pub line_starts: Vec<usize>,
38    /// Byte offsets where each line begins in `styled`. Parallel to
39    /// `line_starts`; identical length.
40    pub styled_line_starts: Vec<usize>,
41    /// Headings in source order as recorded by [`HeadingRecorder`].
42    pub headings: Vec<HeadingEntry>,
43}
44
45impl RenderedDoc {
46    /// Rendered line count, always at least 1.
47    pub fn line_count(&self) -> usize {
48        self.line_starts.len().saturating_sub(1).max(1)
49    }
50
51    /// Styled bytes for rendered line `n`, including its trailing `\n`.
52    ///
53    /// Returns an empty slice when `n` is past the end.
54    pub fn styled_line(&self, n: usize) -> &[u8] {
55        self.styled_line_starts
56            .get(n..=n + 1)
57            .map_or(&[][..], |range| &self.styled[range[0]..range[1]])
58    }
59
60    /// Rendered line index containing `offset` in the plain buffer.
61    pub fn line_for_plain_offset(&self, offset: usize) -> usize {
62        match self.line_starts.binary_search(&offset) {
63            Ok(i) => i,
64            Err(i) => i.saturating_sub(1),
65        }
66    }
67}
68
69/// Assemble a [`RenderedDoc`] from the styled output + recorded headings.
70pub fn build(styled: Vec<u8>, headings: Vec<HeadingEntry>) -> RenderedDoc {
71    let plain = strip_ansi(&styled);
72    let (line_starts, styled_line_starts) = index_lines(&styled, &plain);
73    RenderedDoc {
74        styled,
75        plain,
76        line_starts,
77        styled_line_starts,
78        headings,
79    }
80}
81
82/// [`RenderObserver`] that collects heading starts with their text.
83///
84/// pulldown-cmark emits `Start(Heading)` → inline events → `End(Heading)`.
85/// The recorder accumulates text between start and end, then pushes a
86/// finalised [`HeadingEntry`] on the closing event.
87#[derive(Default)]
88pub struct HeadingRecorder {
89    pending: Option<PendingHeading>,
90    done: Vec<HeadingEntry>,
91}
92
93/// Accumulator between `Start(Heading)` and `End(Heading)` events.
94struct PendingHeading {
95    level: u8,
96    plain_offset: u64,
97    text: String,
98}
99
100impl HeadingRecorder {
101    /// Drop the recorder, returning the recorded entries.
102    pub fn finish(self) -> Vec<HeadingEntry> {
103        self.done
104    }
105}
106
107impl RenderObserver for HeadingRecorder {
108    fn on_event(&mut self, byte_offset: u64, event: &Event<'_>) {
109        match event {
110            Event::Start(Tag::Heading { level, .. }) => {
111                self.pending = Some(PendingHeading {
112                    level: heading_level_to_u8(*level),
113                    plain_offset: byte_offset,
114                    text: String::new(),
115                });
116            }
117            Event::End(TagEnd::Heading(_)) => {
118                if let Some(p) = self.pending.take() {
119                    self.done.push(HeadingEntry {
120                        level: p.level,
121                        text: p.text.trim().to_string(),
122                        plain_offset: p.plain_offset as usize,
123                    });
124                }
125            }
126            Event::Text(s) | Event::Code(s) => {
127                if let Some(p) = self.pending.as_mut() {
128                    p.text.push_str(s);
129                }
130            }
131            _ => {}
132        }
133    }
134}
135
136/// Map pulldown-cmark's [`HeadingLevel`] to the 1-6 numeric range.
137fn heading_level_to_u8(level: HeadingLevel) -> u8 {
138    level as u8
139}
140
141/// Byte past the end of an ANSI escape sequence starting at `input[start]`.
142///
143/// Covers the shapes `push_tty` emits: CSI (`ESC [ ... final`), OSC / DCS /
144/// APC / PM (`ESC X ... BEL` or `... ESC \`), and unknown two-byte escapes.
145/// Returns `start` unchanged when `input[start]` isn't an escape or the
146/// sequence is truncated.
147fn skip_escape(input: &[u8], start: usize) -> usize {
148    let Some(&next) = input.get(start + 1) else {
149        return start + 1;
150    };
151    match next {
152        b'[' => {
153            // CSI parameters + intermediates, then one final byte 0x40..=0x7e.
154            let mut i = start + 2;
155            while input.get(i).is_some_and(|&b| !(0x40..=0x7e).contains(&b)) {
156                i += 1;
157            }
158            i + usize::from(i < input.len())
159        }
160        b']' | b'P' | b'_' | b'^' => {
161            // OSC / DCS / APC / PM, terminated by BEL or ST.
162            let mut i = start + 2;
163            while let Some(&b) = input.get(i) {
164                if b == 0x07 {
165                    return i + 1;
166                }
167                if b == 0x1b && input.get(i + 1) == Some(&b'\\') {
168                    return i + 2;
169                }
170                i += 1;
171            }
172            i
173        }
174        _ => start + 2,
175    }
176}
177
178/// Remove ANSI control sequences and return the decoded text.
179fn strip_ansi(input: &[u8]) -> String {
180    let mut out = Vec::with_capacity(input.len());
181    let mut i = 0;
182    while i < input.len() {
183        if input[i] == 0x1b {
184            i = skip_escape(input, i);
185        } else {
186            out.push(input[i]);
187            i += 1;
188        }
189    }
190    // Input is UTF-8 markdown with ASCII-only escapes; stripping keeps it valid.
191    String::from_utf8(out)
192        .unwrap_or_else(|err| String::from_utf8_lossy(err.as_bytes()).into_owned())
193}
194
195/// Line-start offsets in both buffers, parallel by index.
196///
197/// Final entries are `(plain.len(), styled.len())` so callers can always
198/// slice `line_starts[i..=i+1]` for line `i`.
199fn index_lines(styled: &[u8], plain: &str) -> (Vec<usize>, Vec<usize>) {
200    let mut plain_starts = vec![0];
201    let mut styled_starts = vec![0];
202    let (mut p, mut s) = (0usize, 0usize);
203
204    while s < styled.len() {
205        if styled[s] == 0x1b {
206            s = skip_escape(styled, s);
207            continue;
208        }
209        if styled[s] == b'\n' {
210            s += 1;
211            p += 1;
212            plain_starts.push(p);
213            styled_starts.push(s);
214            continue;
215        }
216        s += 1;
217        p += 1;
218    }
219
220    if *plain_starts.last().unwrap() != plain.len() {
221        plain_starts.push(plain.len());
222        styled_starts.push(styled.len());
223    }
224    (plain_starts, styled_starts)
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn strips_sgr_and_preserves_text() {
233        let bytes = b"\x1b[1mbold\x1b[0m plain \x1b[34mblue\x1b[0m";
234        assert_eq!(strip_ansi(bytes), "bold plain blue");
235    }
236
237    #[test]
238    fn strips_osc8_hyperlinks() {
239        let bytes = b"\x1b]8;;https://example.com\x1b\\label\x1b]8;;\x1b\\";
240        assert_eq!(strip_ansi(bytes), "label");
241    }
242
243    #[test]
244    fn preserves_newlines_for_line_indexing() {
245        let bytes = b"line one\nline two\n";
246        let s = strip_ansi(bytes);
247        assert_eq!(s, "line one\nline two\n");
248    }
249
250    #[test]
251    fn build_indexes_three_lines() {
252        let styled = b"\x1b[1malpha\x1b[0m\nbeta\ngamma\n".to_vec();
253        let doc = build(styled, Vec::new());
254        assert_eq!(doc.plain, "alpha\nbeta\ngamma\n");
255        assert_eq!(doc.line_count(), 3);
256        // Plain line starts: 0, 6 ("alpha\n"), 11 ("beta\n"), 17 (sentinel).
257        assert_eq!(doc.line_starts, vec![0, 6, 11, 17]);
258        // Styled line 1 is "beta\n" — no escapes on that line.
259        assert_eq!(doc.styled_line(1), b"beta\n");
260        // Styled line 0 includes the SGR codes.
261        assert_eq!(doc.styled_line(0), b"\x1b[1malpha\x1b[0m\n");
262    }
263
264    #[test]
265    fn line_lookup_round_trips() {
266        // "one\n" occupies offsets 0..=3, "two\n" occupies 4..=7, "three\n"
267        // occupies 8..=13. line_starts = [0, 4, 8, 14].
268        let styled = b"one\ntwo\nthree\n".to_vec();
269        let doc = build(styled, Vec::new());
270        assert_eq!(doc.line_for_plain_offset(0), 0);
271        assert_eq!(doc.line_for_plain_offset(3), 0); // newline still on line 0
272        assert_eq!(doc.line_for_plain_offset(4), 1); // start of "two"
273        assert_eq!(doc.line_for_plain_offset(8), 2); // start of "three"
274        assert_eq!(doc.line_for_plain_offset(100), 3); // past end clamps to sentinel
275    }
276}