Skip to main content

streamdown_core/
state.rs

1//! Parse state for streaming markdown processing.
2//!
3//! The [`ParseState`] struct maintains all state needed to process
4//! streaming markdown input incrementally.
5
6use crate::enums::{BlockType, Code, EmitFlag, ListType};
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9
10/// Background reset ANSI code (used as default background).
11pub const BGRESET: &str = "\x1b[49m";
12
13/// Snapshot of current inline formatting states.
14///
15/// This is returned by [`ParseState::current()`] to capture
16/// the current inline formatting context.
17#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
18pub struct InlineState {
19    /// Whether inline code formatting is active
20    pub inline_code: bool,
21    /// Whether bold formatting is active
22    pub in_bold: bool,
23    /// Whether italic formatting is active
24    pub in_italic: bool,
25    /// Whether underline formatting is active
26    pub in_underline: bool,
27    /// Whether strikeout formatting is active
28    pub in_strikeout: bool,
29}
30
31/// Main parse state for streaming markdown processing.
32///
33/// This struct maintains all the state needed to incrementally parse
34/// markdown as it streams in. It tracks formatting states, list contexts,
35/// code blocks, tables, and various configuration options.
36///
37/// # Example
38///
39/// ```
40/// use streamdown_core::ParseState;
41///
42/// let mut state = ParseState::new();
43/// state.set_width(80);
44/// ```
45#[derive(Debug, Clone)]
46pub struct ParseState {
47    // === Input buffer state ===
48    /// Raw byte buffer for incomplete UTF-8 sequences
49    pub buffer: Vec<u8>,
50    /// Current line being processed
51    pub current_line: String,
52    /// Whether this is the first line of input
53    pub first_line: bool,
54    /// Whether the last line was empty
55    pub last_line_empty: bool,
56
57    // === Terminal/execution context ===
58    /// Whether input is from a PTY
59    pub is_pty: bool,
60    /// Whether in execution mode
61    pub is_exec: bool,
62    /// Whether we might be at a shell prompt
63    pub maybe_prompt: bool,
64    /// Compiled regex for prompt detection
65    pub prompt_regex: Option<Regex>,
66    /// Current emit flag for special output handling
67    pub emit_flag: Option<EmitFlag>,
68    /// Scrape buffer for content extraction
69    pub scrape: Option<String>,
70    /// Current index in scrape buffer
71    pub scrape_ix: usize,
72    /// Terminal reference (placeholder for terminal handle)
73    pub terminal: Option<String>,
74
75    // === Width configuration ===
76    /// User-specified width argument
77    pub width_arg: Option<usize>,
78    /// Full terminal width
79    pub width_full: Option<usize>,
80    /// Whether to wrap text
81    pub width_wrap: bool,
82
83    // === Indentation state ===
84    /// First line indentation level
85    pub first_indent: Option<usize>,
86    /// Whether current content has a newline
87    pub has_newline: bool,
88    /// Current background color code
89    pub bg: String,
90
91    // === Code block state ===
92    /// Buffer for code block content (with highlighting)
93    pub code_buffer: String,
94    /// Raw code buffer (without highlighting)
95    pub code_buffer_raw: String,
96    /// Generation counter for code blocks
97    pub code_gen: usize,
98    /// Language of current code block
99    pub code_language: Option<String>,
100    /// Whether on first line of code block
101    pub code_first_line: bool,
102    /// Indentation level of code block
103    pub code_indent: usize,
104    /// Current line in code block
105    pub code_line: String,
106
107    // === List state ===
108    /// Stack of ordered list numbers for nested lists
109    pub ordered_list_numbers: Vec<usize>,
110    /// Stack of (indent, type) for nested lists
111    pub list_item_stack: Vec<(usize, ListType)>,
112    /// Text indentation for list content
113    pub list_indent_text: usize,
114
115    // === Block/inline state flags ===
116    /// Whether currently in a list
117    pub in_list: bool,
118    /// Current code block type (None if not in code)
119    pub in_code: Option<Code>,
120    /// Whether inline code is active
121    pub inline_code: bool,
122    /// Whether bold formatting is active
123    pub in_bold: bool,
124    /// Whether italic formatting is active
125    pub in_italic: bool,
126    /// Current table state (None if not in table)
127    pub in_table: Option<Code>,
128    /// Whether underline formatting is active
129    pub in_underline: bool,
130    /// Whether strikeout formatting is active
131    pub in_strikeout: bool,
132    /// Current block quote depth
133    pub block_depth: usize,
134    /// Type of current block element
135    pub block_type: Option<BlockType>,
136
137    // === Execution state ===
138    /// Subprocess handle placeholder
139    pub exec_sub: Option<String>,
140    /// Master PTY fd placeholder
141    pub exec_master: Option<i32>,
142    /// Slave PTY fd placeholder
143    pub exec_slave: Option<i32>,
144    /// Keyboard input counter for exec mode
145    pub exec_kb: usize,
146
147    // === Exit/debug state ===
148    /// Exit code
149    pub exit: i32,
150    /// Debug: where input came from
151    pub where_from: Option<String>,
152
153    // === Feature flags from config ===
154    /// Enable link rendering
155    pub links: bool,
156    /// Enable image rendering
157    pub images: bool,
158    /// Enable space-indented code blocks
159    pub code_spaces: bool,
160    /// Enable clipboard integration
161    pub clipboard: bool,
162    /// Enable logging
163    pub logging: bool,
164    /// Timeout for streaming operations (seconds)
165    pub timeout: f64,
166    /// Save brace matching state
167    pub savebrace: bool,
168}
169
170impl Default for ParseState {
171    fn default() -> Self {
172        Self::new()
173    }
174}
175
176impl ParseState {
177    /// Create a new ParseState with default values.
178    ///
179    /// # Example
180    ///
181    /// ```
182    /// use streamdown_core::ParseState;
183    /// let state = ParseState::new();
184    /// assert!(state.first_line);
185    /// ```
186    pub fn new() -> Self {
187        Self {
188            // Input buffer state
189            buffer: Vec::new(),
190            current_line: String::new(),
191            first_line: true,
192            last_line_empty: true,
193
194            // Terminal/execution context
195            is_pty: false,
196            is_exec: false,
197            maybe_prompt: false,
198            prompt_regex: None,
199            emit_flag: None,
200            scrape: None,
201            scrape_ix: 0,
202            terminal: None,
203
204            // Width configuration
205            width_arg: None,
206            width_full: None,
207            width_wrap: false,
208
209            // Indentation state
210            first_indent: None,
211            has_newline: false,
212            bg: BGRESET.to_string(),
213
214            // Code block state
215            code_buffer: String::new(),
216            code_buffer_raw: String::new(),
217            code_gen: 0,
218            code_language: None,
219            code_first_line: false,
220            code_indent: 0,
221            code_line: String::new(),
222
223            // List state
224            ordered_list_numbers: Vec::new(),
225            list_item_stack: Vec::new(),
226            list_indent_text: 0,
227
228            // Block/inline state flags
229            in_list: false,
230            in_code: None,
231            inline_code: false,
232            in_bold: false,
233            in_italic: false,
234            in_table: None,
235            in_underline: false,
236            in_strikeout: false,
237            block_depth: 0,
238            block_type: None,
239
240            // Execution state
241            exec_sub: None,
242            exec_master: None,
243            exec_slave: None,
244            exec_kb: 0,
245
246            // Exit/debug state
247            exit: 0,
248            where_from: None,
249
250            // Feature flags from config
251            links: true,
252            images: true,
253            code_spaces: false,
254            clipboard: true,
255            logging: false,
256            timeout: 0.1,
257            savebrace: true,
258        }
259    }
260
261    /// Returns a snapshot of current inline formatting states.
262    ///
263    /// This captures whether bold, italic, underline, strikeout,
264    /// and inline code are currently active.
265    ///
266    /// # Example
267    ///
268    /// ```
269    /// use streamdown_core::ParseState;
270    /// let mut state = ParseState::new();
271    /// state.in_bold = true;
272    /// let inline = state.current();
273    /// assert!(inline.in_bold);
274    /// ```
275    pub fn current(&self) -> InlineState {
276        InlineState {
277            inline_code: self.inline_code,
278            in_bold: self.in_bold,
279            in_italic: self.in_italic,
280            in_underline: self.in_underline,
281            in_strikeout: self.in_strikeout,
282        }
283    }
284
285    /// Reset all inline formatting states to false.
286    ///
287    /// This is typically called when exiting a block that should
288    /// not carry inline formatting across boundaries.
289    ///
290    /// # Example
291    ///
292    /// ```
293    /// use streamdown_core::ParseState;
294    /// let mut state = ParseState::new();
295    /// state.in_bold = true;
296    /// state.in_italic = true;
297    /// state.reset_inline();
298    /// assert!(!state.in_bold);
299    /// assert!(!state.in_italic);
300    /// ```
301    pub fn reset_inline(&mut self) {
302        self.inline_code = false;
303        self.in_bold = false;
304        self.in_italic = false;
305        self.in_underline = false;
306        self.in_strikeout = false;
307    }
308
309    /// Set the terminal width.
310    ///
311    /// # Arguments
312    ///
313    /// * `width` - The terminal width in columns
314    pub fn set_width(&mut self, width: usize) {
315        self.width_full = Some(width);
316    }
317
318    /// Calculate the full available width with an optional offset.
319    ///
320    /// Returns the full terminal width minus the offset. If no width
321    /// is configured, returns a default of 80.
322    ///
323    /// # Arguments
324    ///
325    /// * `offset` - Number of columns to subtract from full width
326    ///
327    /// # Example
328    ///
329    /// ```
330    /// use streamdown_core::ParseState;
331    /// let mut state = ParseState::new();
332    /// state.set_width(100);
333    /// assert_eq!(state.full_width(10), 90);
334    /// ```
335    pub fn full_width(&self, offset: usize) -> usize {
336        let base = self.width_full.unwrap_or(80);
337        base.saturating_sub(offset)
338    }
339
340    /// Calculate the current usable width for content.
341    ///
342    /// This takes into account:
343    /// - Base terminal width
344    /// - Block quote depth (each level uses 2 columns)
345    /// - List indentation (if `listwidth` is true)
346    ///
347    /// # Arguments
348    ///
349    /// * `listwidth` - Whether to account for list indentation
350    ///
351    /// # Example
352    ///
353    /// ```
354    /// use streamdown_core::ParseState;
355    /// let mut state = ParseState::new();
356    /// state.set_width(80);
357    /// state.block_depth = 2;
358    /// assert_eq!(state.current_width(false), 76); // 80 - (2 * 2)
359    /// ```
360    pub fn current_width(&self, listwidth: bool) -> usize {
361        let base = self.width_full.unwrap_or(80);
362
363        // Subtract block quote indentation (2 chars per level)
364        let block_offset = self.block_depth * 2;
365
366        // Subtract list indentation if requested
367        let list_offset = if listwidth { self.list_indent_text } else { 0 };
368
369        base.saturating_sub(block_offset + list_offset)
370    }
371
372    /// Generate the left spacing/margin string for current context.
373    ///
374    /// This creates the appropriate leading whitespace and block quote
375    /// markers for the current nesting level.
376    ///
377    /// # Arguments
378    ///
379    /// * `listwidth` - Whether to include list indentation
380    ///
381    /// # Example
382    ///
383    /// ```
384    /// use streamdown_core::ParseState;
385    /// let mut state = ParseState::new();
386    /// state.block_depth = 1;
387    /// let margin = state.space_left(false);
388    /// assert_eq!(margin, "│ ");
389    /// ```
390    pub fn space_left(&self, listwidth: bool) -> String {
391        let mut result = String::new();
392
393        // Add block quote markers
394        for _ in 0..self.block_depth {
395            result.push_str("│ ");
396        }
397
398        // Add list indentation if requested
399        if listwidth && self.list_indent_text > 0 {
400            result.push_str(&" ".repeat(self.list_indent_text));
401        }
402
403        result
404    }
405
406    /// Check if currently inside any code block.
407    pub fn is_in_code(&self) -> bool {
408        self.in_code.is_some()
409    }
410
411    /// Check if currently inside a table.
412    pub fn is_in_table(&self) -> bool {
413        self.in_table.is_some()
414    }
415
416    /// Check if any inline formatting is active.
417    pub fn has_inline_formatting(&self) -> bool {
418        self.inline_code || self.in_bold || self.in_italic || self.in_underline || self.in_strikeout
419    }
420
421    /// Push a new list level onto the stack.
422    ///
423    /// # Arguments
424    ///
425    /// * `indent` - Indentation level of the list
426    /// * `list_type` - Type of list (Bullet or Ordered)
427    pub fn push_list(&mut self, indent: usize, list_type: ListType) {
428        self.list_item_stack.push((indent, list_type));
429        if list_type == ListType::Ordered {
430            self.ordered_list_numbers.push(1);
431        }
432        self.in_list = true;
433    }
434
435    /// Pop the current list level from the stack.
436    ///
437    /// Returns the popped (indent, type) tuple if the stack was non-empty.
438    pub fn pop_list(&mut self) -> Option<(usize, ListType)> {
439        let result = self.list_item_stack.pop();
440        if let Some((_, ListType::Ordered)) = result {
441            self.ordered_list_numbers.pop();
442        }
443        self.in_list = !self.list_item_stack.is_empty();
444        result
445    }
446
447    /// Get the current list depth (nesting level).
448    pub fn list_depth(&self) -> usize {
449        self.list_item_stack.len()
450    }
451
452    /// Get and increment the current ordered list number.
453    ///
454    /// Returns the current number before incrementing.
455    pub fn next_list_number(&mut self) -> Option<usize> {
456        self.ordered_list_numbers.last_mut().map(|n| {
457            let current = *n;
458            *n += 1;
459            current
460        })
461    }
462
463    /// Enter a code block.
464    ///
465    /// # Arguments
466    ///
467    /// * `code_type` - The type of code block (Backtick or Spaces)
468    /// * `language` - Optional language identifier
469    pub fn enter_code_block(&mut self, code_type: Code, language: Option<String>) {
470        self.in_code = Some(code_type);
471        self.code_language = language;
472        self.code_first_line = true;
473        self.code_buffer.clear();
474        self.code_buffer_raw.clear();
475        self.code_gen += 1;
476    }
477
478    /// Exit the current code block.
479    pub fn exit_code_block(&mut self) {
480        self.in_code = None;
481        self.code_language = None;
482        self.code_first_line = false;
483    }
484
485    /// Enter a block quote.
486    ///
487    /// # Arguments
488    ///
489    /// * `block_type` - The type of block (Quote or Think)
490    pub fn enter_block(&mut self, block_type: BlockType) {
491        self.block_depth += 1;
492        self.block_type = Some(block_type);
493    }
494
495    /// Exit one level of block quote.
496    pub fn exit_block(&mut self) {
497        if self.block_depth > 0 {
498            self.block_depth -= 1;
499        }
500        if self.block_depth == 0 {
501            self.block_type = None;
502        }
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn test_new_state() {
512        let state = ParseState::new();
513        assert!(state.first_line);
514        assert!(state.last_line_empty);
515        assert!(!state.in_bold);
516        assert!(state.in_code.is_none());
517        assert_eq!(state.block_depth, 0);
518    }
519
520    #[test]
521    fn test_current_inline_state() {
522        let mut state = ParseState::new();
523        state.in_bold = true;
524        state.in_italic = true;
525
526        let inline = state.current();
527        assert!(inline.in_bold);
528        assert!(inline.in_italic);
529        assert!(!inline.inline_code);
530    }
531
532    #[test]
533    fn test_reset_inline() {
534        let mut state = ParseState::new();
535        state.in_bold = true;
536        state.in_italic = true;
537        state.in_underline = true;
538
539        state.reset_inline();
540
541        assert!(!state.in_bold);
542        assert!(!state.in_italic);
543        assert!(!state.in_underline);
544    }
545
546    #[test]
547    fn test_full_width() {
548        let mut state = ParseState::new();
549        state.set_width(100);
550
551        assert_eq!(state.full_width(0), 100);
552        assert_eq!(state.full_width(20), 80);
553        assert_eq!(state.full_width(150), 0); // saturating_sub
554    }
555
556    #[test]
557    fn test_current_width_with_blocks() {
558        let mut state = ParseState::new();
559        state.set_width(80);
560
561        assert_eq!(state.current_width(false), 80);
562
563        state.block_depth = 2;
564        assert_eq!(state.current_width(false), 76); // 80 - 4
565
566        state.list_indent_text = 4;
567        assert_eq!(state.current_width(true), 72); // 80 - 4 - 4
568        assert_eq!(state.current_width(false), 76); // list indent not counted
569    }
570
571    #[test]
572    fn test_space_left() {
573        let mut state = ParseState::new();
574
575        assert_eq!(state.space_left(false), "");
576
577        state.block_depth = 2;
578        assert_eq!(state.space_left(false), "│ │ ");
579
580        state.list_indent_text = 3;
581        assert_eq!(state.space_left(true), "│ │    ");
582    }
583
584    #[test]
585    fn test_list_operations() {
586        let mut state = ParseState::new();
587
588        state.push_list(0, ListType::Ordered);
589        assert!(state.in_list);
590        assert_eq!(state.list_depth(), 1);
591        assert_eq!(state.next_list_number(), Some(1));
592        assert_eq!(state.next_list_number(), Some(2));
593
594        state.push_list(2, ListType::Bullet);
595        assert_eq!(state.list_depth(), 2);
596
597        state.pop_list();
598        assert_eq!(state.list_depth(), 1);
599        assert!(state.in_list);
600
601        state.pop_list();
602        assert_eq!(state.list_depth(), 0);
603        assert!(!state.in_list);
604    }
605
606    #[test]
607    fn test_code_block_operations() {
608        let mut state = ParseState::new();
609
610        assert!(!state.is_in_code());
611
612        state.enter_code_block(Code::Backtick, Some("rust".to_string()));
613        assert!(state.is_in_code());
614        assert_eq!(state.code_language, Some("rust".to_string()));
615        assert!(state.code_first_line);
616        assert_eq!(state.code_gen, 1);
617
618        state.exit_code_block();
619        assert!(!state.is_in_code());
620        assert!(state.code_language.is_none());
621    }
622
623    #[test]
624    fn test_block_operations() {
625        let mut state = ParseState::new();
626
627        state.enter_block(BlockType::Quote);
628        assert_eq!(state.block_depth, 1);
629        assert_eq!(state.block_type, Some(BlockType::Quote));
630
631        state.enter_block(BlockType::Quote);
632        assert_eq!(state.block_depth, 2);
633
634        state.exit_block();
635        assert_eq!(state.block_depth, 1);
636
637        state.exit_block();
638        assert_eq!(state.block_depth, 0);
639        assert!(state.block_type.is_none());
640    }
641}