Skip to main content

live_markdown/
lib.rs

1#[cfg(feature = "python")]
2use pyo3::prelude::*;
3
4use regex::Regex;
5use std::collections::HashSet;
6use std::thread;
7use std::time::Duration;
8use termion::{color, cursor, style};
9
10// Syntax highlighting imports
11use syntect::easy::HighlightLines;
12use syntect::highlighting::{Style, ThemeSet};
13use syntect::parsing::SyntaxSet;
14use syntect::util::{LinesWithEndings, as_24_bit_terminal_escaped};
15
16/// Core engine for streaming Markdown to a terminal with live formatting.
17/// Handles word wrapping, syntax highlighting for code blocks, and nested block styles.
18pub struct MarkdownStreamerCore {
19    buffer: String,
20    // Internal byte buffer to handle fragmented UTF-8 streams
21    byte_cache: Vec<u8>,
22    output_buffer: String,
23    code_block_depth: usize,
24    depth_stack: Vec<String>,
25    current_code_lang: String,
26    current_code_line: String,
27    code_history: String,
28    at_line_start: bool,
29    active_styles: HashSet<String>,
30    word_buffer: String,
31    line_pos: u16,
32    term_width: u16,
33    ps: SyntaxSet,
34    ts: ThemeSet,
35    /// Delay between processing characters for a "smooth" typewriter effect
36    render_delay: Duration,
37    /// Minimum buffer size before starting the initial render
38    initial_buffer_threshold: usize,
39    /// Flag to track if the initial buffer has been cleared
40    initial_buffer_passed: bool,
41}
42
43impl MarkdownStreamerCore {
44    /// Background colors for nested code blocks (ANSI 256-color palette)
45    const BGS: &'static [&'static str] = &[
46        "\x1b[48;5;235m",
47        "\x1b[48;5;237m",
48        "\x1b[48;5;239m",
49        "\x1b[48;5;241m",
50    ];
51
52    pub fn new() -> Self {
53        let (width, _) = termion::terminal_size().unwrap_or((80, 24));
54        Self {
55            buffer: String::new(),
56            byte_cache: Vec::new(),
57            output_buffer: String::new(),
58            code_block_depth: 0,
59            depth_stack: Vec::new(),
60            current_code_lang: String::new(),
61            current_code_line: String::new(),
62            code_history: String::new(),
63            at_line_start: true,
64            active_styles: HashSet::new(),
65            word_buffer: String::new(),
66            line_pos: 0,
67            term_width: width,
68            ps: SyntaxSet::load_defaults_newlines(),
69            ts: ThemeSet::load_defaults(),
70            render_delay: Duration::from_millis(0),
71            initial_buffer_threshold: 20,
72            initial_buffer_passed: false,
73        }
74    }
75
76    /// Pulls processed terminal output from the internal buffer.
77    pub fn read_available(&mut self) -> String {
78        std::mem::take(&mut self.output_buffer)
79    }
80
81    fn get_block_bg(&self) -> String {
82        if self.code_block_depth == 0 {
83            return String::new();
84        }
85        let idx = (self.code_block_depth - 1).min(Self::BGS.len() - 1);
86        Self::BGS[idx].to_string()
87    }
88
89    fn apply_indentation(&mut self) {
90        if self.code_block_depth == 0 {
91            return;
92        }
93        let move_len = (self.code_block_depth as u16 - 1) * 4;
94        if move_len > 0 {
95            self.output_buffer
96                .push_str(&format!("{}", cursor::Right(move_len)));
97            self.line_pos += move_len;
98        }
99    }
100
101    /// Transforms raw code text into syntax-highlighted ANSI strings.
102    fn apply_highlighting(&self, text: &str, lang: &str) -> String {
103        let bg = self.get_block_bg();
104        if lang == "markdown" || (text.is_empty() && self.code_block_depth > 0) {
105            return format!("{}{}\x1b[K", bg, text);
106        }
107
108        let syntax = self
109            .ps
110            .find_syntax_by_token(lang)
111            .unwrap_or_else(|| self.ps.find_syntax_plain_text());
112        let mut h = HighlightLines::new(syntax, &self.ts.themes["base16-ocean.dark"]);
113
114        // We use code history to maintain highlighting state across incremental chunks
115        let mut full_content = self.code_history.clone();
116        full_content.push_str(text);
117
118        let mut result = String::new();
119        for line in LinesWithEndings::from(&full_content) {
120            let ranges: Vec<(Style, &str)> = h.highlight_line(line, &self.ps).unwrap();
121            result = as_24_bit_terminal_escaped(&ranges[..], false);
122        }
123
124        // Clean up backgrounds and resets to ensure block background consistency
125        let clean_bg_regex = Regex::new(r"\x1b\[48;[0-9;]*m").unwrap();
126        let stripped = clean_bg_regex.replace_all(&result, "");
127        let reset_regex = Regex::new(r"\x1b\[0?m").unwrap();
128        let no_resets = reset_regex.replace_all(&stripped, "");
129        let esc_regex = Regex::new(r"(\x1b\[[0-9;]*m)").unwrap();
130        let fixed_result = esc_regex.replace_all(&no_resets, format!("$1{}", bg));
131
132        format!("{}{}{}\x1b[K", bg, fixed_result, bg)
133    }
134
135    /// Handles terminal cursor movement to overwrite lines during live updates.
136    fn clear_and_move_up(&mut self, text: String) {
137        let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
138        let stripped = re.replace_all(&text, "");
139        let total_len = stripped.chars().count() as u16;
140
141        // Calculate visual line wrap count
142        let extra_rows = if total_len > 0 {
143            (self.line_pos + total_len - 1) / self.term_width
144        } else {
145            0
146        };
147
148        self.output_buffer.push('\r');
149        for _ in 0..extra_rows {
150            self.output_buffer.push_str("\x1b[K"); // Clear current line
151            self.output_buffer.push_str("\x1b[A"); // Move up
152        }
153
154        self.output_buffer.push_str("\x1b[K");
155        self.output_buffer.push('\r');
156
157        self.apply_indentation();
158    }
159
160    /// Flushes the current word buffer, handling word-wrap logic if the word exceeds terminal width.
161    fn flush_word(&mut self, prefix: &str) {
162        if self.word_buffer.is_empty() {
163            return;
164        }
165        let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
166        let visible_word = re.replace_all(&self.word_buffer, "").into_owned();
167        let visible_len = visible_word.chars().count() as u16;
168
169        if self.line_pos + visible_len >= self.term_width - 1 {
170            let bg = self.get_block_bg();
171            self.output_buffer
172                .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
173            self.line_pos = 0;
174            self.apply_indentation();
175            self.output_buffer.push_str(prefix);
176            if self.code_block_depth > 0 {
177                self.output_buffer.push_str("\x1b[K");
178            }
179        }
180        self.output_buffer.push_str(&self.word_buffer);
181        self.line_pos += visible_len;
182        self.word_buffer.clear();
183    }
184
185    fn toggle_style(&mut self, style_key: &str) {
186        let bg = self.get_block_bg();
187        if self.active_styles.contains(style_key) {
188            self.active_styles.remove(style_key);
189        } else {
190            self.active_styles.insert(style_key.to_string());
191        }
192        self.word_buffer.push_str(&format!("{}", style::Reset));
193        self.word_buffer.push_str(&bg);
194        for s in &self.active_styles {
195            match s.as_str() {
196                "bold" => self.word_buffer.push_str(&format!("{}", style::Bold)),
197                "italic" => self.word_buffer.push_str(&format!("{}", style::Italic)),
198                "code" => {
199                    self.word_buffer
200                        .push_str(&format!("{}", color::Fg(color::Yellow)));
201                    self.word_buffer.push_str("\x1b[48;5;238m");
202                }
203                _ => {}
204            }
205        }
206    }
207
208    /// Main loop for parsing the incoming Markdown stream.
209    pub fn process_buffer(&mut self, final_call: bool) {
210        let re_close = Regex::new(r"^[ \t]*```[ \t]*\n?").unwrap();
211        let re_open = Regex::new(r"^[ \t]*```([a-zA-Z0-9\-\+#]+)[ \t]*\n?").unwrap();
212        let re_hr = Regex::new(r"^[ \t]*(\-{3,}|\*{3,}|\_{3,})[ \t]*\n?").unwrap();
213
214        // Ensure we have enough data for a meaningful initial render
215        if !final_call && !self.initial_buffer_passed {
216            if self.buffer.chars().count() < self.initial_buffer_threshold {
217                return;
218            } else {
219                self.initial_buffer_passed = true;
220            }
221        }
222
223        while !self.buffer.is_empty() || (final_call && !self.word_buffer.is_empty()) {
224            if final_call && self.buffer.is_empty() && !self.word_buffer.is_empty() {
225                let bg = self.get_block_bg();
226                self.flush_word(&bg);
227                if self.buffer.is_empty() {
228                    break;
229                }
230            }
231
232            // LOOKAHEAD GUARD:
233            if !final_call && self.buffer.chars().count() < 15 {
234                if self.at_line_start
235                    || self.buffer.contains('`')
236                    || self.buffer.contains('*')
237                    || self.buffer.contains('_')
238                {
239                    break;
240                }
241            }
242
243            if !self.render_delay.is_zero() {
244                thread::sleep(self.render_delay);
245            }
246
247            let bg = self.get_block_bg();
248
249            // Handling inside Code Blocks
250            if self.code_block_depth > 0 && self.current_code_lang != "markdown" {
251                let close_match = re_close.find(&self.buffer).map(|m| m.end());
252                if let Some(end_idx) = close_match {
253                    if self.at_line_start {
254                        if !self.current_code_line.is_empty() {
255                            let line_content = self.current_code_line.clone();
256                            let lang = self.current_code_lang.clone();
257                            let line_hl = self.apply_highlighting(&line_content, &lang);
258                            self.clear_and_move_up(line_content);
259                            self.output_buffer.push_str(&line_hl);
260                        }
261                        self.output_buffer
262                            .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
263                        self.code_block_depth -= 1;
264                        self.current_code_lang = self.depth_stack.pop().unwrap_or_default();
265                        self.current_code_line.clear();
266                        self.code_history.clear();
267                        self.buffer.drain(..end_idx);
268                        if self.code_block_depth > 0 {
269                            self.output_buffer
270                                .push_str(&format!("{}\x1b[K", self.get_block_bg()));
271                        }
272                        self.at_line_start = true;
273                        self.line_pos = 0;
274                        continue;
275                    }
276                }
277
278                let c = match self.buffer.chars().next() {
279                    Some(val) => val,
280                    None => break,
281                };
282                self.buffer.drain(..c.len_utf8());
283
284                if c == '\n' {
285                    let line_content = self.current_code_line.clone();
286                    let lang = self.current_code_lang.clone();
287                    let line_hl = self.apply_highlighting(&line_content, &lang);
288                    self.clear_and_move_up(line_content);
289                    self.output_buffer.push_str(&line_hl);
290                    self.output_buffer.push_str(&format!("{}\n", style::Reset));
291                    self.code_history.push_str(&self.current_code_line);
292                    self.code_history.push('\n');
293                    self.current_code_line.clear();
294                    self.at_line_start = true;
295                    self.line_pos = 0;
296                    self.output_buffer
297                        .push_str(&format!("{}\x1b[K", self.get_block_bg()));
298                } else {
299                    let line_content_before = self.current_code_line.clone();
300                    self.clear_and_move_up(line_content_before);
301                    self.current_code_line.push(c);
302                    let line_content_after = self.current_code_line.clone();
303                    let lang = self.current_code_lang.clone();
304                    let new_hl = self.apply_highlighting(&line_content_after, &lang);
305                    self.output_buffer.push_str(&new_hl);
306                    self.at_line_start = false;
307                }
308                continue;
309            }
310
311            // Handling Standard Markdown
312            if self.at_line_start {
313                if let Some(mat) = re_open.captures(&self.buffer.clone()) {
314                    let lang = mat.get(1).unwrap().as_str().to_lowercase();
315                    self.output_buffer
316                        .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
317                    let old_lang = self.current_code_lang.clone();
318                    self.depth_stack.push(old_lang);
319                    self.code_block_depth += 1;
320                    self.current_code_lang = lang;
321                    self.code_history.clear();
322                    self.buffer.drain(..mat.get(0).unwrap().end());
323                    self.at_line_start = true;
324                    self.output_buffer
325                        .push_str(&format!("{}\x1b[K", self.get_block_bg()));
326                    continue;
327                }
328
329                if let Some(mat) = re_hr.find(&self.buffer.clone()) {
330                    let full_len = mat.end();
331                    self.apply_indentation();
332                    let bar = "─".repeat((self.term_width - self.line_pos) as usize);
333                    self.output_buffer.push_str(&format!(
334                        "{}{}{}{}\n",
335                        bg,
336                        color::Fg(color::AnsiValue(244)),
337                        bar,
338                        style::Reset
339                    ));
340                    self.buffer.drain(..full_len);
341                    self.line_pos = 0;
342                    self.at_line_start = true;
343                    continue;
344                }
345            }
346
347            if self.buffer.starts_with("**") || self.buffer.starts_with("__") {
348                self.toggle_style("bold");
349                self.buffer.drain(..2);
350                continue;
351            }
352            if self.buffer.starts_with('*') || self.buffer.starts_with('_') {
353                self.toggle_style("italic");
354                self.buffer.drain(..1);
355                continue;
356            }
357            if self.buffer.starts_with('`') {
358                self.toggle_style("code");
359                self.buffer.drain(..1);
360                continue;
361            }
362
363            let c = match self.buffer.chars().next() {
364                Some(val) => val,
365                None => break,
366            };
367            self.buffer.drain(..c.len_utf8());
368
369            match c {
370                '\\' => {
371                    if let Some(next) = self.buffer.chars().next() {
372                        self.buffer.drain(..next.len_utf8());
373                        self.word_buffer.push(next);
374                    }
375                }
376                ' ' | '\n' => {
377                    self.flush_word(&bg);
378                    if c == '\n' {
379                        self.output_buffer
380                            .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
381                        self.line_pos = 0;
382                        self.at_line_start = true;
383                        let block_bg = self.get_block_bg();
384                        if !block_bg.is_empty() {
385                            self.output_buffer.push_str(&format!("{}\x1b[K", block_bg));
386                        }
387                        self.active_styles.clear();
388                    } else {
389                        self.output_buffer.push_str(&format!("{} ", bg));
390                        self.line_pos += 1;
391                    }
392                }
393                _ => {
394                    self.word_buffer.push(c);
395                    self.at_line_start = false;
396                }
397            }
398        }
399    }
400
401    /// Receives raw byte data. Accumulates bytes until a valid UTF-8 string is formed.
402    pub fn terminal_stream(&mut self, bytes: &[u8]) {
403        self.byte_cache.extend_from_slice(bytes);
404
405        match String::from_utf8(self.byte_cache.clone()) {
406            Ok(valid_string) => {
407                self.buffer.push_str(&valid_string);
408                self.byte_cache.clear();
409                self.process_buffer(false);
410            }
411            Err(e) => {
412                let valid_up_to = e.utf8_error().valid_up_to();
413                if valid_up_to > 0 {
414                    let valid_part =
415                        String::from_utf8_lossy(&self.byte_cache[..valid_up_to]).into_owned();
416                    self.buffer.push_str(&valid_part);
417                    self.byte_cache.drain(..valid_up_to);
418                    self.process_buffer(false);
419                }
420            }
421        }
422    }
423
424    pub fn finish(&mut self) {
425        if !self.byte_cache.is_empty() {
426            let leftover = String::from_utf8_lossy(&self.byte_cache).into_owned();
427            self.buffer.push_str(&leftover);
428            self.byte_cache.clear();
429        }
430        self.process_buffer(true);
431        self.output_buffer
432            .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
433    }
434}
435
436/// PyO3 Wrapper for exposing the streamer to Python
437#[cfg_attr(feature = "python", pyclass)]
438pub struct MarkdownStreamer {
439    core: MarkdownStreamerCore,
440}
441
442#[cfg_attr(feature = "python", pymethods)]
443impl MarkdownStreamer {
444    #[cfg(feature = "python")]
445    #[new]
446    pub fn new() -> Self {
447        Self {
448            core: MarkdownStreamerCore::new(),
449        }
450    }
451    pub fn read_available(&mut self) -> String {
452        self.core.read_available()
453    }
454    pub fn terminal_stream(&mut self, data: &[u8]) {
455        self.core.terminal_stream(data);
456    }
457    pub fn finish(&mut self) {
458        self.core.finish();
459    }
460}
461
462#[cfg(feature = "python")]
463#[pymodule]
464fn my_rust(_py: Python, m: &PyModule) -> PyResult<()> {
465    m.add_class::<MarkdownStreamer>()?;
466    Ok(())
467}