streamdown_core/state.rs
1//! Parse state for streaming markdown processing.
2//!
3//! The [`ParseState`] struct maintains all state needed to process
4//! streaming markdown input incrementally.
5
6use crate::enums::{BlockType, Code, EmitFlag, ListType};
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9
10/// Background reset ANSI code (used as default background).
11pub const BGRESET: &str = "\x1b[49m";
12
13/// Snapshot of current inline formatting states.
14///
15/// This is returned by [`ParseState::current()`] to capture
16/// the current inline formatting context.
17#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
18pub struct InlineState {
19 /// Whether inline code formatting is active
20 pub inline_code: bool,
21 /// Whether bold formatting is active
22 pub in_bold: bool,
23 /// Whether italic formatting is active
24 pub in_italic: bool,
25 /// Whether underline formatting is active
26 pub in_underline: bool,
27 /// Whether strikeout formatting is active
28 pub in_strikeout: bool,
29}
30
31/// Main parse state for streaming markdown processing.
32///
33/// This struct maintains all the state needed to incrementally parse
34/// markdown as it streams in. It tracks formatting states, list contexts,
35/// code blocks, tables, and various configuration options.
36///
37/// # Example
38///
39/// ```
40/// use streamdown_core::ParseState;
41///
42/// let mut state = ParseState::new();
43/// state.set_width(80);
44/// ```
45#[derive(Debug, Clone)]
46pub struct ParseState {
47 // === Input buffer state ===
48 /// Raw byte buffer for incomplete UTF-8 sequences
49 pub buffer: Vec<u8>,
50 /// Current line being processed
51 pub current_line: String,
52 /// Whether this is the first line of input
53 pub first_line: bool,
54 /// Whether the last line was empty
55 pub last_line_empty: bool,
56
57 // === Terminal/execution context ===
58 /// Whether input is from a PTY
59 pub is_pty: bool,
60 /// Whether in execution mode
61 pub is_exec: bool,
62 /// Whether we might be at a shell prompt
63 pub maybe_prompt: bool,
64 /// Compiled regex for prompt detection
65 pub prompt_regex: Option<Regex>,
66 /// Current emit flag for special output handling
67 pub emit_flag: Option<EmitFlag>,
68 /// Scrape buffer for content extraction
69 pub scrape: Option<String>,
70 /// Current index in scrape buffer
71 pub scrape_ix: usize,
72 /// Terminal reference (placeholder for terminal handle)
73 pub terminal: Option<String>,
74
75 // === Width configuration ===
76 /// User-specified width argument
77 pub width_arg: Option<usize>,
78 /// Full terminal width
79 pub width_full: Option<usize>,
80 /// Whether to wrap text
81 pub width_wrap: bool,
82
83 // === Indentation state ===
84 /// First line indentation level
85 pub first_indent: Option<usize>,
86 /// Whether current content has a newline
87 pub has_newline: bool,
88 /// Current background color code
89 pub bg: String,
90
91 // === Code block state ===
92 /// Buffer for code block content (with highlighting)
93 pub code_buffer: String,
94 /// Raw code buffer (without highlighting)
95 pub code_buffer_raw: String,
96 /// Generation counter for code blocks
97 pub code_gen: usize,
98 /// Language of current code block
99 pub code_language: Option<String>,
100 /// Whether on first line of code block
101 pub code_first_line: bool,
102 /// Indentation level of code block
103 pub code_indent: usize,
104 /// Current line in code block
105 pub code_line: String,
106
107 // === List state ===
108 /// Stack of ordered list numbers for nested lists
109 pub ordered_list_numbers: Vec<usize>,
110 /// Stack of (indent, type) for nested lists
111 pub list_item_stack: Vec<(usize, ListType)>,
112 /// Text indentation for list content
113 pub list_indent_text: usize,
114
115 // === Block/inline state flags ===
116 /// Whether currently in a list
117 pub in_list: bool,
118 /// Current code block type (None if not in code)
119 pub in_code: Option<Code>,
120 /// Whether inline code is active
121 pub inline_code: bool,
122 /// Whether bold formatting is active
123 pub in_bold: bool,
124 /// Whether italic formatting is active
125 pub in_italic: bool,
126 /// Current table state (None if not in table)
127 pub in_table: Option<Code>,
128 /// Whether underline formatting is active
129 pub in_underline: bool,
130 /// Whether strikeout formatting is active
131 pub in_strikeout: bool,
132 /// Current block quote depth
133 pub block_depth: usize,
134 /// Type of current block element
135 pub block_type: Option<BlockType>,
136
137 // === Execution state ===
138 /// Subprocess handle placeholder
139 pub exec_sub: Option<String>,
140 /// Master PTY fd placeholder
141 pub exec_master: Option<i32>,
142 /// Slave PTY fd placeholder
143 pub exec_slave: Option<i32>,
144 /// Keyboard input counter for exec mode
145 pub exec_kb: usize,
146
147 // === Exit/debug state ===
148 /// Exit code
149 pub exit: i32,
150 /// Debug: where input came from
151 pub where_from: Option<String>,
152
153 // === Feature flags from config ===
154 /// Enable link rendering
155 pub links: bool,
156 /// Enable image rendering
157 pub images: bool,
158 /// Enable space-indented code blocks
159 pub code_spaces: bool,
160 /// Enable clipboard integration
161 pub clipboard: bool,
162 /// Enable logging
163 pub logging: bool,
164 /// Timeout for streaming operations (seconds)
165 pub timeout: f64,
166 /// Save brace matching state
167 pub savebrace: bool,
168}
169
170impl Default for ParseState {
171 fn default() -> Self {
172 Self::new()
173 }
174}
175
176impl ParseState {
177 /// Create a new ParseState with default values.
178 ///
179 /// # Example
180 ///
181 /// ```
182 /// use streamdown_core::ParseState;
183 /// let state = ParseState::new();
184 /// assert!(state.first_line);
185 /// ```
186 pub fn new() -> Self {
187 Self {
188 // Input buffer state
189 buffer: Vec::new(),
190 current_line: String::new(),
191 first_line: true,
192 last_line_empty: true,
193
194 // Terminal/execution context
195 is_pty: false,
196 is_exec: false,
197 maybe_prompt: false,
198 prompt_regex: None,
199 emit_flag: None,
200 scrape: None,
201 scrape_ix: 0,
202 terminal: None,
203
204 // Width configuration
205 width_arg: None,
206 width_full: None,
207 width_wrap: false,
208
209 // Indentation state
210 first_indent: None,
211 has_newline: false,
212 bg: BGRESET.to_string(),
213
214 // Code block state
215 code_buffer: String::new(),
216 code_buffer_raw: String::new(),
217 code_gen: 0,
218 code_language: None,
219 code_first_line: false,
220 code_indent: 0,
221 code_line: String::new(),
222
223 // List state
224 ordered_list_numbers: Vec::new(),
225 list_item_stack: Vec::new(),
226 list_indent_text: 0,
227
228 // Block/inline state flags
229 in_list: false,
230 in_code: None,
231 inline_code: false,
232 in_bold: false,
233 in_italic: false,
234 in_table: None,
235 in_underline: false,
236 in_strikeout: false,
237 block_depth: 0,
238 block_type: None,
239
240 // Execution state
241 exec_sub: None,
242 exec_master: None,
243 exec_slave: None,
244 exec_kb: 0,
245
246 // Exit/debug state
247 exit: 0,
248 where_from: None,
249
250 // Feature flags from config
251 links: true,
252 images: true,
253 code_spaces: false,
254 clipboard: true,
255 logging: false,
256 timeout: 0.1,
257 savebrace: true,
258 }
259 }
260
261 /// Returns a snapshot of current inline formatting states.
262 ///
263 /// This captures whether bold, italic, underline, strikeout,
264 /// and inline code are currently active.
265 ///
266 /// # Example
267 ///
268 /// ```
269 /// use streamdown_core::ParseState;
270 /// let mut state = ParseState::new();
271 /// state.in_bold = true;
272 /// let inline = state.current();
273 /// assert!(inline.in_bold);
274 /// ```
275 pub fn current(&self) -> InlineState {
276 InlineState {
277 inline_code: self.inline_code,
278 in_bold: self.in_bold,
279 in_italic: self.in_italic,
280 in_underline: self.in_underline,
281 in_strikeout: self.in_strikeout,
282 }
283 }
284
285 /// Reset all inline formatting states to false.
286 ///
287 /// This is typically called when exiting a block that should
288 /// not carry inline formatting across boundaries.
289 ///
290 /// # Example
291 ///
292 /// ```
293 /// use streamdown_core::ParseState;
294 /// let mut state = ParseState::new();
295 /// state.in_bold = true;
296 /// state.in_italic = true;
297 /// state.reset_inline();
298 /// assert!(!state.in_bold);
299 /// assert!(!state.in_italic);
300 /// ```
301 pub fn reset_inline(&mut self) {
302 self.inline_code = false;
303 self.in_bold = false;
304 self.in_italic = false;
305 self.in_underline = false;
306 self.in_strikeout = false;
307 }
308
309 /// Set the terminal width.
310 ///
311 /// # Arguments
312 ///
313 /// * `width` - The terminal width in columns
314 pub fn set_width(&mut self, width: usize) {
315 self.width_full = Some(width);
316 }
317
318 /// Calculate the full available width with an optional offset.
319 ///
320 /// Returns the full terminal width minus the offset. If no width
321 /// is configured, returns a default of 80.
322 ///
323 /// # Arguments
324 ///
325 /// * `offset` - Number of columns to subtract from full width
326 ///
327 /// # Example
328 ///
329 /// ```
330 /// use streamdown_core::ParseState;
331 /// let mut state = ParseState::new();
332 /// state.set_width(100);
333 /// assert_eq!(state.full_width(10), 90);
334 /// ```
335 pub fn full_width(&self, offset: usize) -> usize {
336 let base = self.width_full.unwrap_or(80);
337 base.saturating_sub(offset)
338 }
339
340 /// Calculate the current usable width for content.
341 ///
342 /// This takes into account:
343 /// - Base terminal width
344 /// - Block quote depth (each level uses 2 columns)
345 /// - List indentation (if `listwidth` is true)
346 ///
347 /// # Arguments
348 ///
349 /// * `listwidth` - Whether to account for list indentation
350 ///
351 /// # Example
352 ///
353 /// ```
354 /// use streamdown_core::ParseState;
355 /// let mut state = ParseState::new();
356 /// state.set_width(80);
357 /// state.block_depth = 2;
358 /// assert_eq!(state.current_width(false), 76); // 80 - (2 * 2)
359 /// ```
360 pub fn current_width(&self, listwidth: bool) -> usize {
361 let base = self.width_full.unwrap_or(80);
362
363 // Subtract block quote indentation (2 chars per level)
364 let block_offset = self.block_depth * 2;
365
366 // Subtract list indentation if requested
367 let list_offset = if listwidth { self.list_indent_text } else { 0 };
368
369 base.saturating_sub(block_offset + list_offset)
370 }
371
372 /// Generate the left spacing/margin string for current context.
373 ///
374 /// This creates the appropriate leading whitespace and block quote
375 /// markers for the current nesting level.
376 ///
377 /// # Arguments
378 ///
379 /// * `listwidth` - Whether to include list indentation
380 ///
381 /// # Example
382 ///
383 /// ```
384 /// use streamdown_core::ParseState;
385 /// let mut state = ParseState::new();
386 /// state.block_depth = 1;
387 /// let margin = state.space_left(false);
388 /// assert_eq!(margin, "│ ");
389 /// ```
390 pub fn space_left(&self, listwidth: bool) -> String {
391 let mut result = String::new();
392
393 // Add block quote markers
394 for _ in 0..self.block_depth {
395 result.push_str("│ ");
396 }
397
398 // Add list indentation if requested
399 if listwidth && self.list_indent_text > 0 {
400 result.push_str(&" ".repeat(self.list_indent_text));
401 }
402
403 result
404 }
405
406 /// Check if currently inside any code block.
407 pub fn is_in_code(&self) -> bool {
408 self.in_code.is_some()
409 }
410
411 /// Check if currently inside a table.
412 pub fn is_in_table(&self) -> bool {
413 self.in_table.is_some()
414 }
415
416 /// Check if any inline formatting is active.
417 pub fn has_inline_formatting(&self) -> bool {
418 self.inline_code || self.in_bold || self.in_italic || self.in_underline || self.in_strikeout
419 }
420
421 /// Push a new list level onto the stack.
422 ///
423 /// # Arguments
424 ///
425 /// * `indent` - Indentation level of the list
426 /// * `list_type` - Type of list (Bullet or Ordered)
427 pub fn push_list(&mut self, indent: usize, list_type: ListType) {
428 self.list_item_stack.push((indent, list_type));
429 if list_type == ListType::Ordered {
430 self.ordered_list_numbers.push(1);
431 }
432 self.in_list = true;
433 }
434
435 /// Pop the current list level from the stack.
436 ///
437 /// Returns the popped (indent, type) tuple if the stack was non-empty.
438 pub fn pop_list(&mut self) -> Option<(usize, ListType)> {
439 let result = self.list_item_stack.pop();
440 if let Some((_, ListType::Ordered)) = result {
441 self.ordered_list_numbers.pop();
442 }
443 self.in_list = !self.list_item_stack.is_empty();
444 result
445 }
446
447 /// Get the current list depth (nesting level).
448 pub fn list_depth(&self) -> usize {
449 self.list_item_stack.len()
450 }
451
452 /// Get and increment the current ordered list number.
453 ///
454 /// Returns the current number before incrementing.
455 pub fn next_list_number(&mut self) -> Option<usize> {
456 self.ordered_list_numbers.last_mut().map(|n| {
457 let current = *n;
458 *n += 1;
459 current
460 })
461 }
462
463 /// Enter a code block.
464 ///
465 /// # Arguments
466 ///
467 /// * `code_type` - The type of code block (Backtick or Spaces)
468 /// * `language` - Optional language identifier
469 pub fn enter_code_block(&mut self, code_type: Code, language: Option<String>) {
470 self.in_code = Some(code_type);
471 self.code_language = language;
472 self.code_first_line = true;
473 self.code_buffer.clear();
474 self.code_buffer_raw.clear();
475 self.code_gen += 1;
476 }
477
478 /// Exit the current code block.
479 pub fn exit_code_block(&mut self) {
480 self.in_code = None;
481 self.code_language = None;
482 self.code_first_line = false;
483 }
484
485 /// Enter a block quote.
486 ///
487 /// # Arguments
488 ///
489 /// * `block_type` - The type of block (Quote or Think)
490 pub fn enter_block(&mut self, block_type: BlockType) {
491 self.block_depth += 1;
492 self.block_type = Some(block_type);
493 }
494
495 /// Exit one level of block quote.
496 pub fn exit_block(&mut self) {
497 if self.block_depth > 0 {
498 self.block_depth -= 1;
499 }
500 if self.block_depth == 0 {
501 self.block_type = None;
502 }
503 }
504}
505
506#[cfg(test)]
507mod tests {
508 use super::*;
509
510 #[test]
511 fn test_new_state() {
512 let state = ParseState::new();
513 assert!(state.first_line);
514 assert!(state.last_line_empty);
515 assert!(!state.in_bold);
516 assert!(state.in_code.is_none());
517 assert_eq!(state.block_depth, 0);
518 }
519
520 #[test]
521 fn test_current_inline_state() {
522 let mut state = ParseState::new();
523 state.in_bold = true;
524 state.in_italic = true;
525
526 let inline = state.current();
527 assert!(inline.in_bold);
528 assert!(inline.in_italic);
529 assert!(!inline.inline_code);
530 }
531
532 #[test]
533 fn test_reset_inline() {
534 let mut state = ParseState::new();
535 state.in_bold = true;
536 state.in_italic = true;
537 state.in_underline = true;
538
539 state.reset_inline();
540
541 assert!(!state.in_bold);
542 assert!(!state.in_italic);
543 assert!(!state.in_underline);
544 }
545
546 #[test]
547 fn test_full_width() {
548 let mut state = ParseState::new();
549 state.set_width(100);
550
551 assert_eq!(state.full_width(0), 100);
552 assert_eq!(state.full_width(20), 80);
553 assert_eq!(state.full_width(150), 0); // saturating_sub
554 }
555
556 #[test]
557 fn test_current_width_with_blocks() {
558 let mut state = ParseState::new();
559 state.set_width(80);
560
561 assert_eq!(state.current_width(false), 80);
562
563 state.block_depth = 2;
564 assert_eq!(state.current_width(false), 76); // 80 - 4
565
566 state.list_indent_text = 4;
567 assert_eq!(state.current_width(true), 72); // 80 - 4 - 4
568 assert_eq!(state.current_width(false), 76); // list indent not counted
569 }
570
571 #[test]
572 fn test_space_left() {
573 let mut state = ParseState::new();
574
575 assert_eq!(state.space_left(false), "");
576
577 state.block_depth = 2;
578 assert_eq!(state.space_left(false), "│ │ ");
579
580 state.list_indent_text = 3;
581 assert_eq!(state.space_left(true), "│ │ ");
582 }
583
584 #[test]
585 fn test_list_operations() {
586 let mut state = ParseState::new();
587
588 state.push_list(0, ListType::Ordered);
589 assert!(state.in_list);
590 assert_eq!(state.list_depth(), 1);
591 assert_eq!(state.next_list_number(), Some(1));
592 assert_eq!(state.next_list_number(), Some(2));
593
594 state.push_list(2, ListType::Bullet);
595 assert_eq!(state.list_depth(), 2);
596
597 state.pop_list();
598 assert_eq!(state.list_depth(), 1);
599 assert!(state.in_list);
600
601 state.pop_list();
602 assert_eq!(state.list_depth(), 0);
603 assert!(!state.in_list);
604 }
605
606 #[test]
607 fn test_code_block_operations() {
608 let mut state = ParseState::new();
609
610 assert!(!state.is_in_code());
611
612 state.enter_code_block(Code::Backtick, Some("rust".to_string()));
613 assert!(state.is_in_code());
614 assert_eq!(state.code_language, Some("rust".to_string()));
615 assert!(state.code_first_line);
616 assert_eq!(state.code_gen, 1);
617
618 state.exit_code_block();
619 assert!(!state.is_in_code());
620 assert!(state.code_language.is_none());
621 }
622
623 #[test]
624 fn test_block_operations() {
625 let mut state = ParseState::new();
626
627 state.enter_block(BlockType::Quote);
628 assert_eq!(state.block_depth, 1);
629 assert_eq!(state.block_type, Some(BlockType::Quote));
630
631 state.enter_block(BlockType::Quote);
632 assert_eq!(state.block_depth, 2);
633
634 state.exit_block();
635 assert_eq!(state.block_depth, 1);
636
637 state.exit_block();
638 assert_eq!(state.block_depth, 0);
639 assert!(state.block_type.is_none());
640 }
641}