Skip to main content

live_markdown/
lib.rs

1#[cfg(feature = "python")]
2use pyo3::prelude::*;
3
4use regex::Regex;
5use std::collections::HashSet;
6use std::thread;
7use std::time::Duration;
8use termion::{color, cursor, style};
9
10// Syntax highlighting imports
11use syntect::easy::HighlightLines;
12use syntect::highlighting::{Style, ThemeSet};
13use syntect::parsing::SyntaxSet;
14use syntect::util::{LinesWithEndings, as_24_bit_terminal_escaped};
15
16/// Core engine for streaming Markdown to a terminal with live formatting.
17/// Handles word wrapping, syntax highlighting for code blocks, and nested block styles.
18pub struct MarkdownStreamerCore {
19    buffer: String,
20    output_buffer: String,
21    code_block_depth: usize,
22    depth_stack: Vec<String>,
23    current_code_lang: String,
24    current_code_line: String,
25    code_history: String,
26    at_line_start: bool,
27    active_styles: HashSet<String>,
28    word_buffer: String,
29    line_pos: u16,
30    term_width: u16,
31    ps: SyntaxSet,
32    ts: ThemeSet,
33    /// Delay between processing characters for a "smooth" typewriter effect
34    render_delay: Duration,
35    /// Minimum buffer size before starting the initial render
36    initial_buffer_threshold: usize,
37    /// Flag to track if the initial buffer has been cleared
38    initial_buffer_passed: bool,
39}
40
41impl MarkdownStreamerCore {
42    /// Background colors for nested code blocks (ANSI 256-color palette)
43    const BGS: &'static [&'static str] = &[
44        "\x1b[48;5;235m",
45        "\x1b[48;5;237m",
46        "\x1b[48;5;239m",
47        "\x1b[48;5;241m",
48    ];
49
50    pub fn new() -> Self {
51        let (width, _) = termion::terminal_size().unwrap_or((80, 24));
52        Self {
53            buffer: String::new(),
54            output_buffer: String::new(),
55            code_block_depth: 0,
56            depth_stack: Vec::new(),
57            current_code_lang: String::new(),
58            current_code_line: String::new(),
59            code_history: String::new(),
60            at_line_start: true,
61            active_styles: HashSet::new(),
62            word_buffer: String::new(),
63            line_pos: 0,
64            term_width: width,
65            ps: SyntaxSet::load_defaults_newlines(),
66            ts: ThemeSet::load_defaults(),
67            render_delay: Duration::from_millis(0),
68            initial_buffer_threshold: 20,
69            initial_buffer_passed: false,
70        }
71    }
72
73    /// Pulls processed terminal output from the internal buffer.
74    pub fn read_available(&mut self) -> String {
75        std::mem::take(&mut self.output_buffer)
76    }
77
78    fn get_block_bg(&self) -> String {
79        if self.code_block_depth == 0 {
80            return String::new();
81        }
82        let idx = (self.code_block_depth - 1).min(Self::BGS.len() - 1);
83        Self::BGS[idx].to_string()
84    }
85
86    fn apply_indentation(&mut self) {
87        if self.code_block_depth == 0 {
88            return;
89        }
90        let move_len = (self.code_block_depth as u16 - 1) * 4;
91        if move_len > 0 {
92            self.output_buffer
93                .push_str(&format!("{}", cursor::Right(move_len)));
94            self.line_pos += move_len;
95        }
96    }
97
98    /// Transforms raw code text into syntax-highlighted ANSI strings.
99    fn apply_highlighting(&self, text: &str, lang: &str) -> String {
100        let bg = self.get_block_bg();
101        if lang == "markdown" || (text.is_empty() && self.code_block_depth > 0) {
102            return format!("{}{}\x1b[K", bg, text);
103        }
104
105        let syntax = self
106            .ps
107            .find_syntax_by_token(lang)
108            .unwrap_or_else(|| self.ps.find_syntax_plain_text());
109        let mut h = HighlightLines::new(syntax, &self.ts.themes["base16-ocean.dark"]);
110
111        // We use code history to maintain highlighting state across incremental chunks
112        let mut full_content = self.code_history.clone();
113        full_content.push_str(text);
114
115        let mut result = String::new();
116        for line in LinesWithEndings::from(&full_content) {
117            let ranges: Vec<(Style, &str)> = h.highlight_line(line, &self.ps).unwrap();
118            result = as_24_bit_terminal_escaped(&ranges[..], false);
119        }
120
121        // Clean up backgrounds and resets to ensure block background consistency
122        let clean_bg_regex = Regex::new(r"\x1b\[48;[0-9;]*m").unwrap();
123        let stripped = clean_bg_regex.replace_all(&result, "");
124        let reset_regex = Regex::new(r"\x1b\[0?m").unwrap();
125        let no_resets = reset_regex.replace_all(&stripped, "");
126        let esc_regex = Regex::new(r"(\x1b\[[0-9;]*m)").unwrap();
127        let fixed_result = esc_regex.replace_all(&no_resets, format!("$1{}", bg));
128
129        format!("{}{}{}\x1b[K", bg, fixed_result, bg)
130    }
131
132    /// Handles terminal cursor movement to overwrite lines during live updates.
133    fn clear_and_move_up(&mut self, text: String) {
134        let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
135        let stripped = re.replace_all(&text, "");
136        let total_len = stripped.len() as u16;
137
138        // Calculate visual line wrap count
139        let extra_rows = if total_len > 0 {
140            (self.line_pos + total_len - 1) / self.term_width
141        } else {
142            0
143        };
144
145        self.output_buffer.push('\r');
146        for _ in 0..extra_rows {
147            self.output_buffer.push_str("\x1b[K"); // Clear current line
148            self.output_buffer.push_str("\x1b[A"); // Move up
149        }
150
151        self.output_buffer.push_str("\x1b[K");
152        self.output_buffer.push('\r');
153
154        self.apply_indentation();
155    }
156
157    /// Flushes the current word buffer, handling word-wrap logic if the word exceeds terminal width.
158    fn flush_word(&mut self, prefix: &str) {
159        if self.word_buffer.is_empty() {
160            return;
161        }
162        let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
163        let visible_word = re.replace_all(&self.word_buffer, "").into_owned();
164
165        if self.line_pos + visible_word.len() as u16 >= self.term_width - 1 {
166            let bg = self.get_block_bg();
167            self.output_buffer
168                .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
169            self.line_pos = 0;
170            self.apply_indentation();
171            self.output_buffer.push_str(prefix);
172            if self.code_block_depth > 0 {
173                self.output_buffer.push_str("\x1b[K");
174            }
175        }
176        self.output_buffer.push_str(&self.word_buffer);
177        self.line_pos += visible_word.len() as u16;
178        self.word_buffer.clear();
179    }
180
181    fn toggle_style(&mut self, style_key: &str) {
182        let bg = self.get_block_bg();
183        if self.active_styles.contains(style_key) {
184            self.active_styles.remove(style_key);
185        } else {
186            self.active_styles.insert(style_key.to_string());
187        }
188        self.word_buffer.push_str(&format!("{}", style::Reset));
189        self.word_buffer.push_str(&bg);
190        for s in &self.active_styles {
191            match s.as_str() {
192                "bold" => self.word_buffer.push_str(&format!("{}", style::Bold)),
193                "italic" => self.word_buffer.push_str(&format!("{}", style::Italic)),
194                "code" => {
195                    self.word_buffer
196                        .push_str(&format!("{}", color::Fg(color::Yellow)));
197                    self.word_buffer.push_str("\x1b[48;5;238m");
198                }
199                _ => {}
200            }
201        }
202    }
203
204    /// Main loop for parsing the incoming Markdown stream.
205    pub fn process_buffer(&mut self, final_call: bool) {
206        let re_close = Regex::new(r"^[ \t]*```[ \t]*\n?").unwrap();
207        let re_open = Regex::new(r"^[ \t]*```([a-zA-Z0-9\-\+#]+)[ \t]*\n?").unwrap();
208        let re_hr = Regex::new(r"^[ \t]*(\-{3,}|\*{3,}|\_{3,})[ \t]*\n?").unwrap();
209
210        // If we haven't passed the initial threshold, wait unless it's final
211        if !final_call && !self.initial_buffer_passed {
212            if self.buffer.len() < self.initial_buffer_threshold {
213                return;
214            } else {
215                self.initial_buffer_passed = true;
216            }
217        }
218
219        while !self.buffer.is_empty() || (final_call && !self.word_buffer.is_empty()) {
220            if final_call && self.buffer.is_empty() && !self.word_buffer.is_empty() {
221                let bg = self.get_block_bg();
222                self.flush_word(&bg);
223                if self.buffer.is_empty() {
224                    break;
225                }
226            }
227
228            // To safely detect markers like ```, we need a small lookahead buffer.
229            // We check if the remaining buffer is too small to contain a full marker
230            // unless we are finishing up.
231            if !final_call && self.buffer.len() < 10 {
232                // If we are at line start, we must wait to ensure we don't miss a block marker
233                if self.at_line_start
234                    || self.buffer.contains('`')
235                    || self.buffer.contains('*')
236                    || self.buffer.contains('_')
237                {
238                    break;
239                }
240            }
241
242            // Artificial delay for smooth printing effect
243            if !self.render_delay.is_zero() {
244                thread::sleep(self.render_delay);
245            }
246
247            let bg = self.get_block_bg();
248
249            // Handling inside Code Blocks
250            if self.code_block_depth > 0 && self.current_code_lang != "markdown" {
251                let close_match = re_close.find(&self.buffer).map(|m| m.end());
252                if let Some(end_idx) = close_match {
253                    if self.at_line_start {
254                        if !self.current_code_line.is_empty() {
255                            let line_content = self.current_code_line.clone();
256                            let lang = self.current_code_lang.clone();
257                            let line_hl = self.apply_highlighting(&line_content, &lang);
258                            self.clear_and_move_up(line_content);
259                            self.output_buffer.push_str(&line_hl);
260                        }
261                        self.output_buffer
262                            .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
263                        self.code_block_depth -= 1;
264                        self.current_code_lang = self.depth_stack.pop().unwrap_or_default();
265                        self.current_code_line.clear();
266                        self.code_history.clear();
267                        self.buffer.drain(..end_idx);
268                        if self.code_block_depth > 0 {
269                            self.output_buffer
270                                .push_str(&format!("{}\x1b[K", self.get_block_bg()));
271                        }
272                        self.at_line_start = true;
273                        self.line_pos = 0;
274                        continue;
275                    }
276                }
277                let c = self.buffer.remove(0);
278                if c == '\n' {
279                    let line_content = self.current_code_line.clone();
280                    let lang = self.current_code_lang.clone();
281                    let line_hl = self.apply_highlighting(&line_content, &lang);
282                    self.clear_and_move_up(line_content);
283                    self.output_buffer.push_str(&line_hl);
284                    self.output_buffer.push_str(&format!("{}\n", style::Reset));
285                    self.code_history.push_str(&self.current_code_line);
286                    self.code_history.push('\n');
287                    self.current_code_line.clear();
288                    self.at_line_start = true;
289                    self.line_pos = 0;
290                    self.output_buffer
291                        .push_str(&format!("{}\x1b[K", self.get_block_bg()));
292                } else {
293                    let line_content_before = self.current_code_line.clone();
294                    self.clear_and_move_up(line_content_before);
295                    self.current_code_line.push(c);
296                    let line_content_after = self.current_code_line.clone();
297                    let lang = self.current_code_lang.clone();
298                    let new_hl = self.apply_highlighting(&line_content_after, &lang);
299                    self.output_buffer.push_str(&new_hl);
300                    self.at_line_start = false;
301                }
302                continue;
303            }
304
305            // Handling Standard Markdown
306            if self.at_line_start {
307                if let Some(mat) = re_open.captures(&self.buffer.clone()) {
308                    let lang = mat.get(1).unwrap().as_str().to_lowercase();
309                    self.output_buffer
310                        .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
311                    let old_lang = self.current_code_lang.clone();
312                    self.depth_stack.push(old_lang);
313                    self.code_block_depth += 1;
314                    self.current_code_lang = lang;
315                    self.code_history.clear();
316                    self.buffer.drain(..mat.get(0).unwrap().end());
317                    self.at_line_start = true;
318                    self.output_buffer
319                        .push_str(&format!("{}\x1b[K", self.get_block_bg()));
320                    continue;
321                }
322
323                if let Some(mat) = re_hr.find(&self.buffer.clone()) {
324                    let full_len = mat.end();
325                    self.apply_indentation();
326                    let bar = "─".repeat((self.term_width - self.line_pos) as usize);
327                    self.output_buffer.push_str(&format!(
328                        "{}{}{}{}\n",
329                        bg,
330                        color::Fg(color::AnsiValue(244)),
331                        bar,
332                        style::Reset
333                    ));
334                    self.buffer.drain(..full_len);
335                    self.line_pos = 0;
336                    self.at_line_start = true;
337                    continue;
338                }
339            }
340
341            // Syntax Markers (Bold/Italic/Inline Code)
342            if self.buffer.starts_with("**") || self.buffer.starts_with("__") {
343                self.toggle_style("bold");
344                self.buffer.drain(..2);
345                continue;
346            }
347            if self.buffer.starts_with('*') || self.buffer.starts_with('_') {
348                self.toggle_style("italic");
349                self.buffer.drain(..1);
350                continue;
351            }
352            if self.buffer.starts_with('`') {
353                self.toggle_style("code");
354                self.buffer.drain(..1);
355                continue;
356            }
357
358            let c = self.buffer.remove(0);
359            match c {
360                '\\' => {
361                    if !self.buffer.is_empty() {
362                        let next = self.buffer.remove(0);
363                        self.word_buffer.push(next);
364                    }
365                }
366                ' ' | '\n' => {
367                    self.flush_word(&bg);
368                    if c == '\n' {
369                        self.output_buffer
370                            .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
371                        self.line_pos = 0;
372                        self.at_line_start = true;
373                        let block_bg = self.get_block_bg();
374                        if !block_bg.is_empty() {
375                            self.output_buffer.push_str(&format!("{}\x1b[K", block_bg));
376                        }
377                        self.active_styles.clear();
378                    } else {
379                        self.output_buffer.push_str(&format!("{} ", bg));
380                        self.line_pos += 1;
381                    }
382                }
383                _ => {
384                    self.word_buffer.push(c);
385                    self.at_line_start = false;
386                }
387            }
388        }
389    }
390
391    pub fn terminal_stream(&mut self, text: &str) {
392        self.buffer.push_str(text);
393        self.process_buffer(false);
394    }
395
396    pub fn finish(&mut self) {
397        self.process_buffer(true);
398        self.output_buffer
399            .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
400    }
401}
402
403/// PyO3 Wrapper for exposing the streamer to Python
404#[cfg_attr(feature = "python", pyclass)]
405pub struct MarkdownStreamer {
406    core: MarkdownStreamerCore,
407}
408
409#[cfg_attr(feature = "python", pymethods)]
410impl MarkdownStreamer {
411    #[cfg(feature = "python")]
412    #[new]
413    pub fn new() -> Self {
414        Self {
415            core: MarkdownStreamerCore::new(),
416        }
417    }
418    pub fn read_available(&mut self) -> String {
419        self.core.read_available()
420    }
421    pub fn terminal_stream(&mut self, text: &str) {
422        self.core.terminal_stream(text);
423    }
424    pub fn finish(&mut self) {
425        self.core.finish();
426    }
427}
428
429#[cfg(feature = "python")]
430#[pymodule]
431fn my_rust(_py: Python, m: &PyModule) -> PyResult<()> {
432    m.add_class::<MarkdownStreamer>()?;
433    Ok(())
434}