streamdown_parser/
inline.rs

1//! Inline markdown parser.
2//!
3//! This module handles parsing of inline markdown formatting including
4//! bold, italic, underline, strikethrough, inline code, links, images,
5//! and footnotes.
6
7use crate::tokenizer::{not_text, Token, Tokenizer};
8use streamdown_ansi::codes::digit_to_superscript;
9
10/// Result of parsing inline content.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum InlineElement {
13    /// Plain text
14    Text(String),
15    /// Bold text
16    Bold(String),
17    /// Italic text
18    Italic(String),
19    /// Bold and italic text
20    BoldItalic(String),
21    /// Underlined text
22    Underline(String),
23    /// Strikethrough text
24    Strikeout(String),
25    /// Inline code
26    Code(String),
27    /// A link
28    Link { text: String, url: String },
29    /// An image
30    Image { alt: String, url: String },
31    /// Footnote reference (as superscript)
32    Footnote(String),
33}
34
35/// State for tracking active formatting.
36#[derive(Debug, Clone, Default)]
37struct FormatState {
38    /// Bold is active
39    bold: bool,
40    /// Italic is active
41    italic: bool,
42    /// Underline is active
43    underline: bool,
44    /// Strikeout is active
45    strikeout: bool,
46    /// In inline code (with backtick count)
47    code_backticks: Option<usize>,
48    /// Code content buffer
49    code_buffer: String,
50}
51
52impl FormatState {
53    fn new() -> Self {
54        Self::default()
55    }
56
57    #[allow(dead_code)]
58    fn any_active(&self) -> bool {
59        self.bold || self.italic || self.underline || self.strikeout || self.code_backticks.is_some()
60    }
61
62    fn reset(&mut self) {
63        self.bold = false;
64        self.italic = false;
65        self.underline = false;
66        self.strikeout = false;
67        self.code_backticks = None;
68        self.code_buffer.clear();
69    }
70}
71
72/// Inline markdown parser.
73///
74/// Parses inline formatting and returns structured elements.
75#[derive(Debug)]
76pub struct InlineParser {
77    tokenizer: Tokenizer,
78    state: FormatState,
79    /// Whether to process links
80    pub process_links: bool,
81    /// Whether to process images  
82    pub process_images: bool,
83}
84
85impl Default for InlineParser {
86    fn default() -> Self {
87        Self::new()
88    }
89}
90
91impl InlineParser {
92    /// Create a new inline parser.
93    pub fn new() -> Self {
94        Self {
95            tokenizer: Tokenizer::new(),
96            state: FormatState::new(),
97            process_links: true,
98            process_images: true,
99        }
100    }
101
102    /// Create parser with specific settings.
103    pub fn with_settings(process_links: bool, process_images: bool) -> Self {
104        Self {
105            tokenizer: Tokenizer::with_settings(process_links, process_images),
106            state: FormatState::new(),
107            process_links,
108            process_images,
109        }
110    }
111
112    /// Parse a line of markdown and return inline elements.
113    ///
114    /// This is the main entry point for inline parsing.
115    pub fn parse(&mut self, line: &str) -> Vec<InlineElement> {
116        let tokens = self.tokenizer.tokenize(line);
117        self.parse_tokens(&tokens)
118    }
119
120    /// Parse a sequence of tokens into inline elements.
121    fn parse_tokens(&mut self, tokens: &[Token]) -> Vec<InlineElement> {
122        let mut elements = Vec::new();
123        let mut buffer = String::new();
124        let mut i = 0;
125
126        while i < tokens.len() {
127            let token = &tokens[i];
128
129            // If we're in code mode, handle specially
130            if let Some(expected_backticks) = self.state.code_backticks {
131                match token {
132                    Token::Backticks(n) if *n == expected_backticks => {
133                        // End of inline code
134                        let code = std::mem::take(&mut self.state.code_buffer);
135                        // Trim single leading/trailing space (Markdown spec)
136                        let code = code.strip_prefix(' ').unwrap_or(&code);
137                        let code = code.strip_suffix(' ').unwrap_or(code);
138                        elements.push(InlineElement::Code(code.to_string()));
139                        self.state.code_backticks = None;
140                    }
141                    _ => {
142                        // Add to code buffer
143                        match token {
144                            Token::Text(s) => self.state.code_buffer.push_str(s),
145                            Token::Backticks(n) => {
146                                self.state.code_buffer.push_str(&"`".repeat(*n));
147                            }
148                            _ => {
149                                if let Some(marker) = token.marker_str() {
150                                    self.state.code_buffer.push_str(marker);
151                                }
152                            }
153                        }
154                    }
155                }
156                i += 1;
157                continue;
158            }
159
160            match token {
161                Token::Text(s) => {
162                    buffer.push_str(s);
163                }
164
165                Token::Backticks(n) => {
166                    // Flush buffer
167                    if !buffer.is_empty() {
168                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
169                    }
170                    // Start inline code
171                    self.state.code_backticks = Some(*n);
172                }
173
174                Token::TripleAsterisk => {
175                    // Flush buffer first
176                    if !buffer.is_empty() {
177                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
178                    }
179
180                    if self.state.bold && self.state.italic {
181                        // End both
182                        self.state.bold = false;
183                        self.state.italic = false;
184                    } else if !self.state.bold && !self.state.italic {
185                        // Start both
186                        self.state.bold = true;
187                        self.state.italic = true;
188                    } else {
189                        // Mixed state - just emit as text
190                        buffer.push_str("***");
191                    }
192                }
193
194                Token::DoubleAsterisk => {
195                    if !buffer.is_empty() {
196                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
197                    }
198                    self.state.bold = !self.state.bold;
199                }
200
201                Token::Asterisk => {
202                    if !buffer.is_empty() {
203                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
204                    }
205                    self.state.italic = !self.state.italic;
206                }
207
208                Token::DoubleAsteriskUnderscore => {
209                    // **_ = start bold + start italic
210                    if !buffer.is_empty() {
211                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
212                    }
213                    if !self.state.bold {
214                        self.state.bold = true;
215                    }
216                    self.state.italic = !self.state.italic;
217                }
218
219                Token::UnderscoreDoubleAsterisk => {
220                    // _** = end italic + end bold
221                    if !buffer.is_empty() {
222                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
223                    }
224                    self.state.italic = false;
225                    self.state.bold = false;
226                }
227
228                Token::TripleUnderscore => {
229                    if !buffer.is_empty() {
230                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
231                    }
232
233                    if self.state.underline && self.state.italic {
234                        self.state.underline = false;
235                        self.state.italic = false;
236                    } else if !self.state.underline && !self.state.italic {
237                        self.state.underline = true;
238                        self.state.italic = true;
239                    } else {
240                        buffer.push_str("___");
241                    }
242                }
243
244                Token::DoubleUnderscore => {
245                    if !buffer.is_empty() {
246                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
247                    }
248                    self.state.underline = !self.state.underline;
249                }
250
251                Token::Underscore => {
252                    // Check context - underscore in middle of word shouldn't trigger
253                    let prev_is_text = i > 0
254                        && matches!(&tokens[i - 1], Token::Text(s) if !not_text(s));
255                    let next_is_text = i + 1 < tokens.len()
256                        && matches!(&tokens[i + 1], Token::Text(s) if !not_text(s));
257
258                    if prev_is_text && next_is_text {
259                        // Underscore in middle of word - treat as text
260                        buffer.push('_');
261                    } else {
262                        if !buffer.is_empty() {
263                            self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
264                        }
265                        self.state.italic = !self.state.italic;
266                    }
267                }
268
269                Token::DoubleTilde => {
270                    if !buffer.is_empty() {
271                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
272                    }
273                    self.state.strikeout = !self.state.strikeout;
274                }
275
276                Token::Link { text, url } => {
277                    if !buffer.is_empty() {
278                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
279                    }
280                    elements.push(InlineElement::Link {
281                        text: text.clone(),
282                        url: url.clone(),
283                    });
284                }
285
286                Token::Image { alt, url } => {
287                    if !buffer.is_empty() {
288                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
289                    }
290                    elements.push(InlineElement::Image {
291                        alt: alt.clone(),
292                        url: url.clone(),
293                    });
294                }
295
296                Token::Footnote(num) => {
297                    if !buffer.is_empty() {
298                        self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
299                    }
300                    // Convert number to superscript
301                    let superscript = number_to_superscript(*num);
302                    elements.push(InlineElement::Footnote(superscript));
303                }
304            }
305
306            i += 1;
307        }
308
309        // Flush remaining buffer
310        if !buffer.is_empty() {
311            self.emit_formatted(&mut elements, buffer);
312        }
313
314        // Flush any unclosed code block
315        if self.state.code_backticks.is_some() {
316            let code = std::mem::take(&mut self.state.code_buffer);
317            if !code.is_empty() {
318                elements.push(InlineElement::Code(code));
319            }
320            self.state.code_backticks = None;
321        }
322
323        // Reset state for next line
324        self.state.reset();
325
326        elements
327    }
328
329    /// Emit formatted text based on current state.
330    fn emit_formatted(&self, elements: &mut Vec<InlineElement>, text: String) {
331        if text.is_empty() {
332            return;
333        }
334
335        if self.state.bold && self.state.italic {
336            elements.push(InlineElement::BoldItalic(text));
337        } else if self.state.bold {
338            elements.push(InlineElement::Bold(text));
339        } else if self.state.italic {
340            elements.push(InlineElement::Italic(text));
341        } else if self.state.underline {
342            elements.push(InlineElement::Underline(text));
343        } else if self.state.strikeout {
344            elements.push(InlineElement::Strikeout(text));
345        } else {
346            elements.push(InlineElement::Text(text));
347        }
348    }
349
350    /// Reset the parser state.
351    pub fn reset(&mut self) {
352        self.state.reset();
353    }
354}
355
356/// Convert a number to superscript string.
357fn number_to_superscript(num: u32) -> String {
358    num.to_string()
359        .chars()
360        .map(|c| {
361            let digit = c.to_digit(10).unwrap_or(0) as u8;
362            digit_to_superscript(digit)
363        })
364        .collect()
365}
366
367/// Format a line with inline markdown.
368///
369/// This is a convenience function that parses a line and returns
370/// the formatted result as ANSI-styled text.
371pub fn format_line(line: &str, process_links: bool, process_images: bool) -> String {
372    use streamdown_ansi::codes::*;
373    use streamdown_ansi::style::*;
374
375    let mut parser = InlineParser::with_settings(process_links, process_images);
376    let elements = parser.parse(line);
377
378    let mut result = String::new();
379
380    for element in elements {
381        match element {
382            InlineElement::Text(s) => result.push_str(&s),
383            InlineElement::Bold(s) => {
384                result.push_str(BOLD.0);
385                result.push_str(&s);
386                result.push_str(BOLD.1);
387            }
388            InlineElement::Italic(s) => {
389                result.push_str(ITALIC.0);
390                result.push_str(&s);
391                result.push_str(ITALIC.1);
392            }
393            InlineElement::BoldItalic(s) => {
394                result.push_str(BOLD.0);
395                result.push_str(ITALIC.0);
396                result.push_str(&s);
397                result.push_str(ITALIC.1);
398                result.push_str(BOLD.1);
399            }
400            InlineElement::Underline(s) => {
401                result.push_str(UNDERLINE.0);
402                result.push_str(&s);
403                result.push_str(UNDERLINE.1);
404            }
405            InlineElement::Strikeout(s) => {
406                result.push_str(STRIKEOUT.0);
407                result.push_str(&s);
408                result.push_str(STRIKEOUT.1);
409            }
410            InlineElement::Code(s) => {
411                result.push_str(DIM_ON);
412                result.push_str(&s);
413                result.push_str(DIM_OFF);
414            }
415            InlineElement::Link { text, url } => {
416                result.push_str(LINK.0);
417                result.push_str(&url);
418                result.push_str("\x1b");
419                result.push_str(UNDERLINE.0);
420                result.push_str(&text);
421                result.push_str(UNDERLINE.1);
422                result.push_str(LINK.1);
423            }
424            InlineElement::Image { alt, url: _ } => {
425                result.push_str(DIM_ON);
426                result.push_str("[\u{1F5BC} ");
427                result.push_str(&alt);
428                result.push_str("]");
429                result.push_str(DIM_OFF);
430            }
431            InlineElement::Footnote(s) => {
432                result.push_str(&s);
433            }
434        }
435    }
436
437    result
438}
439
440#[cfg(test)]
441mod tests {
442    use super::*;
443
444    #[test]
445    fn test_parse_plain_text() {
446        let mut parser = InlineParser::new();
447        let elements = parser.parse("Hello world");
448        assert_eq!(elements, vec![InlineElement::Text("Hello world".to_string())]);
449    }
450
451    #[test]
452    fn test_parse_bold() {
453        let mut parser = InlineParser::new();
454        let elements = parser.parse("Hello **bold** world");
455        assert_eq!(
456            elements,
457            vec![
458                InlineElement::Text("Hello ".to_string()),
459                InlineElement::Bold("bold".to_string()),
460                InlineElement::Text(" world".to_string()),
461            ]
462        );
463    }
464
465    #[test]
466    fn test_parse_italic() {
467        let mut parser = InlineParser::new();
468        let elements = parser.parse("Hello *italic* world");
469        assert_eq!(
470            elements,
471            vec![
472                InlineElement::Text("Hello ".to_string()),
473                InlineElement::Italic("italic".to_string()),
474                InlineElement::Text(" world".to_string()),
475            ]
476        );
477    }
478
479    #[test]
480    fn test_parse_bold_italic() {
481        let mut parser = InlineParser::new();
482        let elements = parser.parse("Hello ***bold italic*** world");
483        assert_eq!(
484            elements,
485            vec![
486                InlineElement::Text("Hello ".to_string()),
487                InlineElement::BoldItalic("bold italic".to_string()),
488                InlineElement::Text(" world".to_string()),
489            ]
490        );
491    }
492
493    #[test]
494    fn test_parse_strikethrough() {
495        let mut parser = InlineParser::new();
496        let elements = parser.parse("Hello ~~strike~~ world");
497        assert_eq!(
498            elements,
499            vec![
500                InlineElement::Text("Hello ".to_string()),
501                InlineElement::Strikeout("strike".to_string()),
502                InlineElement::Text(" world".to_string()),
503            ]
504        );
505    }
506
507    #[test]
508    fn test_parse_inline_code() {
509        let mut parser = InlineParser::new();
510        let elements = parser.parse("Use `code` here");
511        assert_eq!(
512            elements,
513            vec![
514                InlineElement::Text("Use ".to_string()),
515                InlineElement::Code("code".to_string()),
516                InlineElement::Text(" here".to_string()),
517            ]
518        );
519    }
520
521    #[test]
522    fn test_parse_double_backtick_code() {
523        let mut parser = InlineParser::new();
524        let elements = parser.parse("Use `` `backticks` `` here");
525        assert_eq!(
526            elements,
527            vec![
528                InlineElement::Text("Use ".to_string()),
529                InlineElement::Code("`backticks`".to_string()),
530                InlineElement::Text(" here".to_string()),
531            ]
532        );
533    }
534
535    #[test]
536    fn test_parse_link() {
537        let mut parser = InlineParser::new();
538        let elements = parser.parse("Check [this](http://example.com) out");
539
540        assert!(elements.iter().any(|e| matches!(
541            e,
542            InlineElement::Link { text, url }
543            if text == "this" && url == "http://example.com"
544        )));
545    }
546
547    #[test]
548    fn test_parse_image() {
549        let mut parser = InlineParser::new();
550        let elements = parser.parse("See ![alt text](http://img.png) here");
551
552        assert!(elements.iter().any(|e| matches!(
553            e,
554            InlineElement::Image { alt, url }
555            if alt == "alt text" && url == "http://img.png"
556        )));
557    }
558
559    #[test]
560    fn test_parse_footnote() {
561        let mut parser = InlineParser::new();
562        let elements = parser.parse("Some text[^1] here");
563
564        assert!(elements
565            .iter()
566            .any(|e| matches!(e, InlineElement::Footnote(s) if s == "¹")));
567    }
568
569    #[test]
570    fn test_parse_footnote_multi_digit() {
571        let mut parser = InlineParser::new();
572        let elements = parser.parse("Reference[^42]");
573
574        assert!(elements
575            .iter()
576            .any(|e| matches!(e, InlineElement::Footnote(s) if s == "⁴²")));
577    }
578
579    #[test]
580    fn test_underscore_in_word() {
581        let mut parser = InlineParser::new();
582        let elements = parser.parse("some_variable_name");
583        // Underscores in middle of word should not trigger formatting
584        assert_eq!(
585            elements,
586            vec![InlineElement::Text("some_variable_name".to_string())]
587        );
588    }
589
590    #[test]
591    fn test_format_line() {
592        let result = format_line("Hello **bold** world", true, true);
593        assert!(result.contains("bold"));
594        assert!(result.contains("\x1b[1m")); // Bold on
595        assert!(result.contains("\x1b[22m")); // Bold off
596    }
597
598    #[test]
599    fn test_number_to_superscript() {
600        assert_eq!(number_to_superscript(0), "⁰");
601        assert_eq!(number_to_superscript(1), "¹");
602        assert_eq!(number_to_superscript(2), "²");
603        assert_eq!(number_to_superscript(42), "⁴²");
604        assert_eq!(number_to_superscript(123), "¹²³");
605    }
606}