1#[cfg(feature = "python")]
2use pyo3::prelude::*;
3
4use regex::Regex;
5use std::collections::HashSet;
6use std::thread;
7use std::time::Duration;
8use termion::{color, cursor, style};
9
10use syntect::easy::HighlightLines;
12use syntect::highlighting::{Style, ThemeSet};
13use syntect::parsing::SyntaxSet;
14use syntect::util::{LinesWithEndings, as_24_bit_terminal_escaped};
15
16pub struct MarkdownStreamerCore {
19 buffer: String,
20 byte_cache: Vec<u8>,
22 output_buffer: String,
23 code_block_depth: usize,
24 depth_stack: Vec<String>,
25 current_code_lang: String,
26 current_code_line: String,
27 code_history: String,
28 at_line_start: bool,
29 active_styles: HashSet<String>,
30 word_buffer: String,
31 line_pos: u16,
32 term_width: u16,
33 ps: SyntaxSet,
34 ts: ThemeSet,
35 render_delay: Duration,
37 initial_buffer_threshold: usize,
39 initial_buffer_passed: bool,
41}
42
43impl MarkdownStreamerCore {
44 const BGS: &'static [&'static str] = &[
46 "\x1b[48;5;235m",
47 "\x1b[48;5;237m",
48 "\x1b[48;5;239m",
49 "\x1b[48;5;241m",
50 ];
51
52 pub fn new() -> Self {
53 let (width, _) = termion::terminal_size().unwrap_or((80, 24));
54 Self {
55 buffer: String::new(),
56 byte_cache: Vec::new(),
57 output_buffer: String::new(),
58 code_block_depth: 0,
59 depth_stack: Vec::new(),
60 current_code_lang: String::new(),
61 current_code_line: String::new(),
62 code_history: String::new(),
63 at_line_start: true,
64 active_styles: HashSet::new(),
65 word_buffer: String::new(),
66 line_pos: 0,
67 term_width: width,
68 ps: SyntaxSet::load_defaults_newlines(),
69 ts: ThemeSet::load_defaults(),
70 render_delay: Duration::from_millis(0),
71 initial_buffer_threshold: 20,
72 initial_buffer_passed: false,
73 }
74 }
75
76 pub fn read_available(&mut self) -> String {
78 std::mem::take(&mut self.output_buffer)
79 }
80
81 fn get_block_bg(&self) -> String {
82 if self.code_block_depth == 0 {
83 return String::new();
84 }
85 let idx = (self.code_block_depth - 1).min(Self::BGS.len() - 1);
86 Self::BGS[idx].to_string()
87 }
88
89 fn apply_indentation(&mut self) {
90 if self.code_block_depth == 0 {
91 return;
92 }
93 let move_len = (self.code_block_depth as u16 - 1) * 4;
94 if move_len > 0 {
95 self.output_buffer
96 .push_str(&format!("{}", cursor::Right(move_len)));
97 self.line_pos += move_len;
98 }
99 }
100
101 fn apply_highlighting(&self, text: &str, lang: &str) -> String {
103 let bg = self.get_block_bg();
104 if lang == "markdown" || (text.is_empty() && self.code_block_depth > 0) {
105 return format!("{}{}\x1b[K", bg, text);
106 }
107
108 let syntax = self
109 .ps
110 .find_syntax_by_token(lang)
111 .unwrap_or_else(|| self.ps.find_syntax_plain_text());
112 let mut h = HighlightLines::new(syntax, &self.ts.themes["base16-ocean.dark"]);
113
114 let mut full_content = self.code_history.clone();
116 full_content.push_str(text);
117
118 let mut result = String::new();
119 for line in LinesWithEndings::from(&full_content) {
120 let ranges: Vec<(Style, &str)> = h.highlight_line(line, &self.ps).unwrap();
121 result = as_24_bit_terminal_escaped(&ranges[..], false);
122 }
123
124 let clean_bg_regex = Regex::new(r"\x1b\[48;[0-9;]*m").unwrap();
126 let stripped = clean_bg_regex.replace_all(&result, "");
127 let reset_regex = Regex::new(r"\x1b\[0?m").unwrap();
128 let no_resets = reset_regex.replace_all(&stripped, "");
129 let esc_regex = Regex::new(r"(\x1b\[[0-9;]*m)").unwrap();
130 let fixed_result = esc_regex.replace_all(&no_resets, format!("$1{}", bg));
131
132 format!("{}{}{}\x1b[K", bg, fixed_result, bg)
133 }
134
135 fn clear_and_move_up(&mut self, text: String) {
137 let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
138 let stripped = re.replace_all(&text, "");
139 let total_len = stripped.chars().count() as u16;
140
141 let extra_rows = if total_len > 0 {
143 (self.line_pos + total_len - 1) / self.term_width
144 } else {
145 0
146 };
147
148 self.output_buffer.push('\r');
149 for _ in 0..extra_rows {
150 self.output_buffer.push_str("\x1b[K"); self.output_buffer.push_str("\x1b[A"); }
153
154 self.output_buffer.push_str("\x1b[K");
155 self.output_buffer.push('\r');
156
157 self.apply_indentation();
158 }
159
160 fn flush_word(&mut self, prefix: &str) {
162 if self.word_buffer.is_empty() {
163 return;
164 }
165 let re = Regex::new(r"\x1b\[[0-9;]*m").unwrap();
166 let visible_word = re.replace_all(&self.word_buffer, "").into_owned();
167 let visible_len = visible_word.chars().count() as u16;
168
169 if self.line_pos + visible_len >= self.term_width - 1 {
170 let bg = self.get_block_bg();
171 self.output_buffer
172 .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
173 self.line_pos = 0;
174 self.apply_indentation();
175 self.output_buffer.push_str(prefix);
176 if self.code_block_depth > 0 {
177 self.output_buffer.push_str("\x1b[K");
178 }
179 }
180 self.output_buffer.push_str(&self.word_buffer);
181 self.line_pos += visible_len;
182 self.word_buffer.clear();
183 }
184
185 fn toggle_style(&mut self, style_key: &str) {
186 let bg = self.get_block_bg();
187 if self.active_styles.contains(style_key) {
188 self.active_styles.remove(style_key);
189 } else {
190 self.active_styles.insert(style_key.to_string());
191 }
192 self.word_buffer.push_str(&format!("{}", style::Reset));
193 self.word_buffer.push_str(&bg);
194 for s in &self.active_styles {
195 match s.as_str() {
196 "bold" => self.word_buffer.push_str(&format!("{}", style::Bold)),
197 "italic" => self.word_buffer.push_str(&format!("{}", style::Italic)),
198 "code" => {
199 self.word_buffer
200 .push_str(&format!("{}", color::Fg(color::Yellow)));
201 self.word_buffer.push_str("\x1b[48;5;238m");
202 }
203 _ => {}
204 }
205 }
206 }
207
208 pub fn process_buffer(&mut self, final_call: bool) {
210 let re_close = Regex::new(r"^[ \t]*```[ \t]*\n?").unwrap();
211 let re_open = Regex::new(r"^[ \t]*```([a-zA-Z0-9\-\+#]+)[ \t]*\n?").unwrap();
212 let re_hr = Regex::new(r"^[ \t]*(\-{3,}|\*{3,}|\_{3,})[ \t]*\n?").unwrap();
213
214 if !final_call && !self.initial_buffer_passed {
216 if self.buffer.chars().count() < self.initial_buffer_threshold {
217 return;
218 } else {
219 self.initial_buffer_passed = true;
220 }
221 }
222
223 while !self.buffer.is_empty() || (final_call && !self.word_buffer.is_empty()) {
224 if final_call && self.buffer.is_empty() && !self.word_buffer.is_empty() {
225 let bg = self.get_block_bg();
226 self.flush_word(&bg);
227 if self.buffer.is_empty() {
228 break;
229 }
230 }
231
232 if !final_call && self.buffer.chars().count() < 15 {
234 if self.at_line_start
235 || self.buffer.contains('`')
236 || self.buffer.contains('*')
237 || self.buffer.contains('_')
238 {
239 break;
240 }
241 }
242
243 if !self.render_delay.is_zero() {
244 thread::sleep(self.render_delay);
245 }
246
247 let bg = self.get_block_bg();
248
249 if self.code_block_depth > 0 && self.current_code_lang != "markdown" {
251 let close_match = re_close.find(&self.buffer).map(|m| m.end());
252 if let Some(end_idx) = close_match {
253 if self.at_line_start {
254 if !self.current_code_line.is_empty() {
255 let line_content = self.current_code_line.clone();
256 let lang = self.current_code_lang.clone();
257 let line_hl = self.apply_highlighting(&line_content, &lang);
258 self.clear_and_move_up(line_content);
259 self.output_buffer.push_str(&line_hl);
260 }
261 self.output_buffer
262 .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
263 self.code_block_depth -= 1;
264 self.current_code_lang = self.depth_stack.pop().unwrap_or_default();
265 self.current_code_line.clear();
266 self.code_history.clear();
267 self.buffer.drain(..end_idx);
268 if self.code_block_depth > 0 {
269 self.output_buffer
270 .push_str(&format!("{}\x1b[K", self.get_block_bg()));
271 }
272 self.at_line_start = true;
273 self.line_pos = 0;
274 continue;
275 }
276 }
277
278 let c = match self.buffer.chars().next() {
279 Some(val) => val,
280 None => break,
281 };
282 self.buffer.drain(..c.len_utf8());
283
284 if c == '\n' {
285 let line_content = self.current_code_line.clone();
286 let lang = self.current_code_lang.clone();
287 let line_hl = self.apply_highlighting(&line_content, &lang);
288 self.clear_and_move_up(line_content);
289 self.output_buffer.push_str(&line_hl);
290 self.output_buffer.push_str(&format!("{}\n", style::Reset));
291 self.code_history.push_str(&self.current_code_line);
292 self.code_history.push('\n');
293 self.current_code_line.clear();
294 self.at_line_start = true;
295 self.line_pos = 0;
296 self.output_buffer
297 .push_str(&format!("{}\x1b[K", self.get_block_bg()));
298 } else {
299 let line_content_before = self.current_code_line.clone();
300 self.clear_and_move_up(line_content_before);
301 self.current_code_line.push(c);
302 let line_content_after = self.current_code_line.clone();
303 let lang = self.current_code_lang.clone();
304 let new_hl = self.apply_highlighting(&line_content_after, &lang);
305 self.output_buffer.push_str(&new_hl);
306 self.at_line_start = false;
307 }
308 continue;
309 }
310
311 if self.at_line_start {
313 if let Some(mat) = re_open.captures(&self.buffer.clone()) {
314 let lang = mat.get(1).unwrap().as_str().to_lowercase();
315 self.output_buffer
316 .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
317 let old_lang = self.current_code_lang.clone();
318 self.depth_stack.push(old_lang);
319 self.code_block_depth += 1;
320 self.current_code_lang = lang;
321 self.code_history.clear();
322 self.buffer.drain(..mat.get(0).unwrap().end());
323 self.at_line_start = true;
324 self.output_buffer
325 .push_str(&format!("{}\x1b[K", self.get_block_bg()));
326 continue;
327 }
328
329 if let Some(mat) = re_hr.find(&self.buffer.clone()) {
330 let full_len = mat.end();
331 self.apply_indentation();
332 let bar = "─".repeat((self.term_width - self.line_pos) as usize);
333 self.output_buffer.push_str(&format!(
334 "{}{}{}{}\n",
335 bg,
336 color::Fg(color::AnsiValue(244)),
337 bar,
338 style::Reset
339 ));
340 self.buffer.drain(..full_len);
341 self.line_pos = 0;
342 self.at_line_start = true;
343 continue;
344 }
345 }
346
347 if self.buffer.starts_with("**") || self.buffer.starts_with("__") {
348 self.toggle_style("bold");
349 self.buffer.drain(..2);
350 continue;
351 }
352 if self.buffer.starts_with('*') || self.buffer.starts_with('_') {
353 self.toggle_style("italic");
354 self.buffer.drain(..1);
355 continue;
356 }
357 if self.buffer.starts_with('`') {
358 self.toggle_style("code");
359 self.buffer.drain(..1);
360 continue;
361 }
362
363 let c = match self.buffer.chars().next() {
364 Some(val) => val,
365 None => break,
366 };
367 self.buffer.drain(..c.len_utf8());
368
369 match c {
370 '\\' => {
371 if let Some(next) = self.buffer.chars().next() {
372 self.buffer.drain(..next.len_utf8());
373 self.word_buffer.push(next);
374 }
375 }
376 ' ' | '\n' => {
377 self.flush_word(&bg);
378 if c == '\n' {
379 self.output_buffer
380 .push_str(&format!("{}\x1b[K\n{}", bg, style::Reset));
381 self.line_pos = 0;
382 self.at_line_start = true;
383 let block_bg = self.get_block_bg();
384 if !block_bg.is_empty() {
385 self.output_buffer.push_str(&format!("{}\x1b[K", block_bg));
386 }
387 self.active_styles.clear();
388 } else {
389 self.output_buffer.push_str(&format!("{} ", bg));
390 self.line_pos += 1;
391 }
392 }
393 _ => {
394 self.word_buffer.push(c);
395 self.at_line_start = false;
396 }
397 }
398 }
399 }
400
401 pub fn terminal_stream(&mut self, bytes: &[u8]) {
403 self.byte_cache.extend_from_slice(bytes);
404
405 match String::from_utf8(self.byte_cache.clone()) {
406 Ok(valid_string) => {
407 self.buffer.push_str(&valid_string);
408 self.byte_cache.clear();
409 self.process_buffer(false);
410 }
411 Err(e) => {
412 let valid_up_to = e.utf8_error().valid_up_to();
413 if valid_up_to > 0 {
414 let valid_part =
415 String::from_utf8_lossy(&self.byte_cache[..valid_up_to]).into_owned();
416 self.buffer.push_str(&valid_part);
417 self.byte_cache.drain(..valid_up_to);
418 self.process_buffer(false);
419 }
420 }
421 }
422 }
423
424 pub fn finish(&mut self) {
425 if !self.byte_cache.is_empty() {
426 let leftover = String::from_utf8_lossy(&self.byte_cache).into_owned();
427 self.buffer.push_str(&leftover);
428 self.byte_cache.clear();
429 }
430 self.process_buffer(true);
431 self.output_buffer
432 .push_str(&format!("{}{}", style::Reset, "\x1b[K"));
433 }
434}
435
436#[cfg_attr(feature = "python", pyclass)]
438pub struct MarkdownStreamer {
439 core: MarkdownStreamerCore,
440}
441
442#[cfg_attr(feature = "python", pymethods)]
443impl MarkdownStreamer {
444 #[cfg(feature = "python")]
445 #[new]
446 pub fn new() -> Self {
447 Self {
448 core: MarkdownStreamerCore::new(),
449 }
450 }
451 pub fn read_available(&mut self) -> String {
452 self.core.read_available()
453 }
454 pub fn terminal_stream(&mut self, data: &[u8]) {
455 self.core.terminal_stream(data);
456 }
457 pub fn finish(&mut self) {
458 self.core.finish();
459 }
460}
461
462#[cfg(feature = "python")]
463#[pymodule]
464fn my_rust(_py: Python, m: &PyModule) -> PyResult<()> {
465 m.add_class::<MarkdownStreamer>()?;
466 Ok(())
467}