streamdown_core/
state.rs

1//! Parse state for streaming markdown processing.
2//!
3//! The [`ParseState`] struct maintains all state needed to process
4//! streaming markdown input incrementally.
5
6use crate::enums::{BlockType, Code, EmitFlag, ListType};
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9
10/// Background reset ANSI code (used as default background).
11pub const BGRESET: &str = "\x1b[49m";
12
13/// Snapshot of current inline formatting states.
14///
15/// This is returned by [`ParseState::current()`] to capture
16/// the current inline formatting context.
17#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
18pub struct InlineState {
19    /// Whether inline code formatting is active
20    pub inline_code: bool,
21    /// Whether bold formatting is active
22    pub in_bold: bool,
23    /// Whether italic formatting is active
24    pub in_italic: bool,
25    /// Whether underline formatting is active
26    pub in_underline: bool,
27    /// Whether strikeout formatting is active
28    pub in_strikeout: bool,
29}
30
31/// Main parse state for streaming markdown processing.
32///
33/// This struct maintains all the state needed to incrementally parse
34/// markdown as it streams in. It tracks formatting states, list contexts,
35/// code blocks, tables, and various configuration options.
36///
37/// # Example
38///
39/// ```
40/// use streamdown_core::ParseState;
41///
42/// let mut state = ParseState::new();
43/// state.set_width(80);
44/// ```
45#[derive(Debug, Clone)]
46pub struct ParseState {
47    // === Input buffer state ===
48    /// Raw byte buffer for incomplete UTF-8 sequences
49    pub buffer: Vec<u8>,
50    /// Current line being processed
51    pub current_line: String,
52    /// Whether this is the first line of input
53    pub first_line: bool,
54    /// Whether the last line was empty
55    pub last_line_empty: bool,
56
57    // === Terminal/execution context ===
58    /// Whether input is from a PTY
59    pub is_pty: bool,
60    /// Whether in execution mode
61    pub is_exec: bool,
62    /// Whether we might be at a shell prompt
63    pub maybe_prompt: bool,
64    /// Compiled regex for prompt detection
65    pub prompt_regex: Option<Regex>,
66    /// Current emit flag for special output handling
67    pub emit_flag: Option<EmitFlag>,
68    /// Scrape buffer for content extraction
69    pub scrape: Option<String>,
70    /// Current index in scrape buffer
71    pub scrape_ix: usize,
72    /// Terminal reference (placeholder for terminal handle)
73    pub terminal: Option<String>,
74
75    // === Width configuration ===
76    /// User-specified width argument
77    pub width_arg: Option<usize>,
78    /// Full terminal width
79    pub width_full: Option<usize>,
80    /// Whether to wrap text
81    pub width_wrap: bool,
82
83    // === Indentation state ===
84    /// First line indentation level
85    pub first_indent: Option<usize>,
86    /// Whether current content has a newline
87    pub has_newline: bool,
88    /// Current background color code
89    pub bg: String,
90
91    // === Code block state ===
92    /// Buffer for code block content (with highlighting)
93    pub code_buffer: String,
94    /// Raw code buffer (without highlighting)
95    pub code_buffer_raw: String,
96    /// Generation counter for code blocks
97    pub code_gen: usize,
98    /// Language of current code block
99    pub code_language: Option<String>,
100    /// Whether on first line of code block
101    pub code_first_line: bool,
102    /// Indentation level of code block
103    pub code_indent: usize,
104    /// Current line in code block
105    pub code_line: String,
106
107    // === List state ===
108    /// Stack of ordered list numbers for nested lists
109    pub ordered_list_numbers: Vec<usize>,
110    /// Stack of (indent, type) for nested lists
111    pub list_item_stack: Vec<(usize, ListType)>,
112    /// Text indentation for list content
113    pub list_indent_text: usize,
114
115    // === Block/inline state flags ===
116    /// Whether currently in a list
117    pub in_list: bool,
118    /// Current code block type (None if not in code)
119    pub in_code: Option<Code>,
120    /// Whether inline code is active
121    pub inline_code: bool,
122    /// Whether bold formatting is active
123    pub in_bold: bool,
124    /// Whether italic formatting is active
125    pub in_italic: bool,
126    /// Current table state (None if not in table)
127    pub in_table: Option<Code>,
128    /// Whether underline formatting is active
129    pub in_underline: bool,
130    /// Whether strikeout formatting is active
131    pub in_strikeout: bool,
132    /// Current block quote depth
133    pub block_depth: usize,
134    /// Type of current block element
135    pub block_type: Option<BlockType>,
136
137    // === Execution state ===
138    /// Subprocess handle placeholder
139    pub exec_sub: Option<String>,
140    /// Master PTY fd placeholder
141    pub exec_master: Option<i32>,
142    /// Slave PTY fd placeholder
143    pub exec_slave: Option<i32>,
144    /// Keyboard input counter for exec mode
145    pub exec_kb: usize,
146
147    // === Exit/debug state ===
148    /// Exit code
149    pub exit: i32,
150    /// Debug: where input came from
151    pub where_from: Option<String>,
152
153    // === Feature flags from config ===
154    /// Enable link rendering
155    pub links: bool,
156    /// Enable image rendering
157    pub images: bool,
158    /// Enable space-indented code blocks
159    pub code_spaces: bool,
160    /// Enable clipboard integration
161    pub clipboard: bool,
162    /// Enable logging
163    pub logging: bool,
164    /// Timeout for streaming operations (seconds)
165    pub timeout: f64,
166    /// Save brace matching state
167    pub savebrace: bool,
168}
169
170impl Default for ParseState {
171    fn default() -> Self {
172        Self::new()
173    }
174}
175
176impl ParseState {
177    /// Create a new ParseState with default values.
178    ///
179    /// # Example
180    ///
181    /// ```
182    /// use streamdown_core::ParseState;
183    /// let state = ParseState::new();
184    /// assert!(state.first_line);
185    /// ```
186    pub fn new() -> Self {
187        Self {
188            // Input buffer state
189            buffer: Vec::new(),
190            current_line: String::new(),
191            first_line: true,
192            last_line_empty: true,
193
194            // Terminal/execution context
195            is_pty: false,
196            is_exec: false,
197            maybe_prompt: false,
198            prompt_regex: None,
199            emit_flag: None,
200            scrape: None,
201            scrape_ix: 0,
202            terminal: None,
203
204            // Width configuration
205            width_arg: None,
206            width_full: None,
207            width_wrap: false,
208
209            // Indentation state
210            first_indent: None,
211            has_newline: false,
212            bg: BGRESET.to_string(),
213
214            // Code block state
215            code_buffer: String::new(),
216            code_buffer_raw: String::new(),
217            code_gen: 0,
218            code_language: None,
219            code_first_line: false,
220            code_indent: 0,
221            code_line: String::new(),
222
223            // List state
224            ordered_list_numbers: Vec::new(),
225            list_item_stack: Vec::new(),
226            list_indent_text: 0,
227
228            // Block/inline state flags
229            in_list: false,
230            in_code: None,
231            inline_code: false,
232            in_bold: false,
233            in_italic: false,
234            in_table: None,
235            in_underline: false,
236            in_strikeout: false,
237            block_depth: 0,
238            block_type: None,
239
240            // Execution state
241            exec_sub: None,
242            exec_master: None,
243            exec_slave: None,
244            exec_kb: 0,
245
246            // Exit/debug state
247            exit: 0,
248            where_from: None,
249
250            // Feature flags from config
251            links: true,
252            images: true,
253            code_spaces: false,
254            clipboard: true,
255            logging: false,
256            timeout: 0.1,
257            savebrace: true,
258        }
259    }
260
261    /// Returns a snapshot of current inline formatting states.
262    ///
263    /// This captures whether bold, italic, underline, strikeout,
264    /// and inline code are currently active.
265    ///
266    /// # Example
267    ///
268    /// ```
269    /// use streamdown_core::ParseState;
270    /// let mut state = ParseState::new();
271    /// state.in_bold = true;
272    /// let inline = state.current();
273    /// assert!(inline.in_bold);
274    /// ```
275    pub fn current(&self) -> InlineState {
276        InlineState {
277            inline_code: self.inline_code,
278            in_bold: self.in_bold,
279            in_italic: self.in_italic,
280            in_underline: self.in_underline,
281            in_strikeout: self.in_strikeout,
282        }
283    }
284
285    /// Reset all inline formatting states to false.
286    ///
287    /// This is typically called when exiting a block that should
288    /// not carry inline formatting across boundaries.
289    ///
290    /// # Example
291    ///
292    /// ```
293    /// use streamdown_core::ParseState;
294    /// let mut state = ParseState::new();
295    /// state.in_bold = true;
296    /// state.in_italic = true;
297    /// state.reset_inline();
298    /// assert!(!state.in_bold);
299    /// assert!(!state.in_italic);
300    /// ```
301    pub fn reset_inline(&mut self) {
302        self.inline_code = false;
303        self.in_bold = false;
304        self.in_italic = false;
305        self.in_underline = false;
306        self.in_strikeout = false;
307    }
308
309    /// Set the terminal width.
310    ///
311    /// # Arguments
312    ///
313    /// * `width` - The terminal width in columns
314    pub fn set_width(&mut self, width: usize) {
315        self.width_full = Some(width);
316    }
317
318    /// Calculate the full available width with an optional offset.
319    ///
320    /// Returns the full terminal width minus the offset. If no width
321    /// is configured, returns a default of 80.
322    ///
323    /// # Arguments
324    ///
325    /// * `offset` - Number of columns to subtract from full width
326    ///
327    /// # Example
328    ///
329    /// ```
330    /// use streamdown_core::ParseState;
331    /// let mut state = ParseState::new();
332    /// state.set_width(100);
333    /// assert_eq!(state.full_width(10), 90);
334    /// ```
335    pub fn full_width(&self, offset: usize) -> usize {
336        let base = self.width_full.unwrap_or(80);
337        base.saturating_sub(offset)
338    }
339
340    /// Calculate the current usable width for content.
341    ///
342    /// This takes into account:
343    /// - Base terminal width
344    /// - Block quote depth (each level uses 2 columns)
345    /// - List indentation (if `listwidth` is true)
346    ///
347    /// # Arguments
348    ///
349    /// * `listwidth` - Whether to account for list indentation
350    ///
351    /// # Example
352    ///
353    /// ```
354    /// use streamdown_core::ParseState;
355    /// let mut state = ParseState::new();
356    /// state.set_width(80);
357    /// state.block_depth = 2;
358    /// assert_eq!(state.current_width(false), 76); // 80 - (2 * 2)
359    /// ```
360    pub fn current_width(&self, listwidth: bool) -> usize {
361        let base = self.width_full.unwrap_or(80);
362
363        // Subtract block quote indentation (2 chars per level)
364        let block_offset = self.block_depth * 2;
365
366        // Subtract list indentation if requested
367        let list_offset = if listwidth {
368            self.list_indent_text
369        } else {
370            0
371        };
372
373        base.saturating_sub(block_offset + list_offset)
374    }
375
376    /// Generate the left spacing/margin string for current context.
377    ///
378    /// This creates the appropriate leading whitespace and block quote
379    /// markers for the current nesting level.
380    ///
381    /// # Arguments
382    ///
383    /// * `listwidth` - Whether to include list indentation
384    ///
385    /// # Example
386    ///
387    /// ```
388    /// use streamdown_core::ParseState;
389    /// let mut state = ParseState::new();
390    /// state.block_depth = 1;
391    /// let margin = state.space_left(false);
392    /// assert_eq!(margin, "│ ");
393    /// ```
394    pub fn space_left(&self, listwidth: bool) -> String {
395        let mut result = String::new();
396
397        // Add block quote markers
398        for _ in 0..self.block_depth {
399            result.push_str("│ ");
400        }
401
402        // Add list indentation if requested
403        if listwidth && self.list_indent_text > 0 {
404            result.push_str(&" ".repeat(self.list_indent_text));
405        }
406
407        result
408    }
409
410    /// Check if currently inside any code block.
411    pub fn is_in_code(&self) -> bool {
412        self.in_code.is_some()
413    }
414
415    /// Check if currently inside a table.
416    pub fn is_in_table(&self) -> bool {
417        self.in_table.is_some()
418    }
419
420    /// Check if any inline formatting is active.
421    pub fn has_inline_formatting(&self) -> bool {
422        self.inline_code || self.in_bold || self.in_italic || self.in_underline || self.in_strikeout
423    }
424
425    /// Push a new list level onto the stack.
426    ///
427    /// # Arguments
428    ///
429    /// * `indent` - Indentation level of the list
430    /// * `list_type` - Type of list (Bullet or Ordered)
431    pub fn push_list(&mut self, indent: usize, list_type: ListType) {
432        self.list_item_stack.push((indent, list_type));
433        if list_type == ListType::Ordered {
434            self.ordered_list_numbers.push(1);
435        }
436        self.in_list = true;
437    }
438
439    /// Pop the current list level from the stack.
440    ///
441    /// Returns the popped (indent, type) tuple if the stack was non-empty.
442    pub fn pop_list(&mut self) -> Option<(usize, ListType)> {
443        let result = self.list_item_stack.pop();
444        if let Some((_, ListType::Ordered)) = result {
445            self.ordered_list_numbers.pop();
446        }
447        self.in_list = !self.list_item_stack.is_empty();
448        result
449    }
450
451    /// Get the current list depth (nesting level).
452    pub fn list_depth(&self) -> usize {
453        self.list_item_stack.len()
454    }
455
456    /// Get and increment the current ordered list number.
457    ///
458    /// Returns the current number before incrementing.
459    pub fn next_list_number(&mut self) -> Option<usize> {
460        self.ordered_list_numbers.last_mut().map(|n| {
461            let current = *n;
462            *n += 1;
463            current
464        })
465    }
466
467    /// Enter a code block.
468    ///
469    /// # Arguments
470    ///
471    /// * `code_type` - The type of code block (Backtick or Spaces)
472    /// * `language` - Optional language identifier
473    pub fn enter_code_block(&mut self, code_type: Code, language: Option<String>) {
474        self.in_code = Some(code_type);
475        self.code_language = language;
476        self.code_first_line = true;
477        self.code_buffer.clear();
478        self.code_buffer_raw.clear();
479        self.code_gen += 1;
480    }
481
482    /// Exit the current code block.
483    pub fn exit_code_block(&mut self) {
484        self.in_code = None;
485        self.code_language = None;
486        self.code_first_line = false;
487    }
488
489    /// Enter a block quote.
490    ///
491    /// # Arguments
492    ///
493    /// * `block_type` - The type of block (Quote or Think)
494    pub fn enter_block(&mut self, block_type: BlockType) {
495        self.block_depth += 1;
496        self.block_type = Some(block_type);
497    }
498
499    /// Exit one level of block quote.
500    pub fn exit_block(&mut self) {
501        if self.block_depth > 0 {
502            self.block_depth -= 1;
503        }
504        if self.block_depth == 0 {
505            self.block_type = None;
506        }
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn test_new_state() {
516        let state = ParseState::new();
517        assert!(state.first_line);
518        assert!(state.last_line_empty);
519        assert!(!state.in_bold);
520        assert!(state.in_code.is_none());
521        assert_eq!(state.block_depth, 0);
522    }
523
524    #[test]
525    fn test_current_inline_state() {
526        let mut state = ParseState::new();
527        state.in_bold = true;
528        state.in_italic = true;
529
530        let inline = state.current();
531        assert!(inline.in_bold);
532        assert!(inline.in_italic);
533        assert!(!inline.inline_code);
534    }
535
536    #[test]
537    fn test_reset_inline() {
538        let mut state = ParseState::new();
539        state.in_bold = true;
540        state.in_italic = true;
541        state.in_underline = true;
542
543        state.reset_inline();
544
545        assert!(!state.in_bold);
546        assert!(!state.in_italic);
547        assert!(!state.in_underline);
548    }
549
550    #[test]
551    fn test_full_width() {
552        let mut state = ParseState::new();
553        state.set_width(100);
554
555        assert_eq!(state.full_width(0), 100);
556        assert_eq!(state.full_width(20), 80);
557        assert_eq!(state.full_width(150), 0); // saturating_sub
558    }
559
560    #[test]
561    fn test_current_width_with_blocks() {
562        let mut state = ParseState::new();
563        state.set_width(80);
564
565        assert_eq!(state.current_width(false), 80);
566
567        state.block_depth = 2;
568        assert_eq!(state.current_width(false), 76); // 80 - 4
569
570        state.list_indent_text = 4;
571        assert_eq!(state.current_width(true), 72); // 80 - 4 - 4
572        assert_eq!(state.current_width(false), 76); // list indent not counted
573    }
574
575    #[test]
576    fn test_space_left() {
577        let mut state = ParseState::new();
578
579        assert_eq!(state.space_left(false), "");
580
581        state.block_depth = 2;
582        assert_eq!(state.space_left(false), "│ │ ");
583
584        state.list_indent_text = 3;
585        assert_eq!(state.space_left(true), "│ │    ");
586    }
587
588    #[test]
589    fn test_list_operations() {
590        let mut state = ParseState::new();
591
592        state.push_list(0, ListType::Ordered);
593        assert!(state.in_list);
594        assert_eq!(state.list_depth(), 1);
595        assert_eq!(state.next_list_number(), Some(1));
596        assert_eq!(state.next_list_number(), Some(2));
597
598        state.push_list(2, ListType::Bullet);
599        assert_eq!(state.list_depth(), 2);
600
601        state.pop_list();
602        assert_eq!(state.list_depth(), 1);
603        assert!(state.in_list);
604
605        state.pop_list();
606        assert_eq!(state.list_depth(), 0);
607        assert!(!state.in_list);
608    }
609
610    #[test]
611    fn test_code_block_operations() {
612        let mut state = ParseState::new();
613
614        assert!(!state.is_in_code());
615
616        state.enter_code_block(Code::Backtick, Some("rust".to_string()));
617        assert!(state.is_in_code());
618        assert_eq!(state.code_language, Some("rust".to_string()));
619        assert!(state.code_first_line);
620        assert_eq!(state.code_gen, 1);
621
622        state.exit_code_block();
623        assert!(!state.is_in_code());
624        assert!(state.code_language.is_none());
625    }
626
627    #[test]
628    fn test_block_operations() {
629        let mut state = ParseState::new();
630
631        state.enter_block(BlockType::Quote);
632        assert_eq!(state.block_depth, 1);
633        assert_eq!(state.block_type, Some(BlockType::Quote));
634
635        state.enter_block(BlockType::Quote);
636        assert_eq!(state.block_depth, 2);
637
638        state.exit_block();
639        assert_eq!(state.block_depth, 1);
640
641        state.exit_block();
642        assert_eq!(state.block_depth, 0);
643        assert!(state.block_type.is_none());
644    }
645}