1use crate::tokenizer::{Token, Tokenizer};
8use streamdown_ansi::codes::digit_to_superscript;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum InlineElement {
13 Text(String),
15 Bold(String),
17 Italic(String),
19 BoldItalic(String),
21 Underline(String),
23 Strikeout(String),
25 Code(String),
27 Link { text: String, url: String },
29 Image { alt: String, url: String },
31 Footnote(String),
33}
34
35#[derive(Debug, Clone, Default)]
37struct FormatState {
38 bold: bool,
40 italic: bool,
42 underline: bool,
44 strikeout: bool,
46 code_backticks: Option<usize>,
48 code_buffer: String,
50}
51
52impl FormatState {
53 fn new() -> Self {
54 Self::default()
55 }
56
57 #[allow(dead_code)]
58 fn any_active(&self) -> bool {
59 self.bold
60 || self.italic
61 || self.underline
62 || self.strikeout
63 || self.code_backticks.is_some()
64 }
65
66 fn reset(&mut self) {
67 self.bold = false;
68 self.italic = false;
69 self.underline = false;
70 self.strikeout = false;
71 self.code_backticks = None;
72 self.code_buffer.clear();
73 }
74}
75
76#[derive(Debug)]
80pub struct InlineParser {
81 tokenizer: Tokenizer,
82 state: FormatState,
83 pub process_links: bool,
85 pub process_images: bool,
87}
88
89impl Default for InlineParser {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95impl InlineParser {
96 pub fn new() -> Self {
98 Self {
99 tokenizer: Tokenizer::new(),
100 state: FormatState::new(),
101 process_links: true,
102 process_images: true,
103 }
104 }
105
106 pub fn with_settings(process_links: bool, process_images: bool) -> Self {
108 Self {
109 tokenizer: Tokenizer::with_settings(process_links, process_images),
110 state: FormatState::new(),
111 process_links,
112 process_images,
113 }
114 }
115
116 pub fn parse(&mut self, line: &str) -> Vec<InlineElement> {
120 let tokens = self.tokenizer.tokenize(line);
121 self.parse_tokens(&tokens)
122 }
123
124 fn parse_tokens(&mut self, tokens: &[Token]) -> Vec<InlineElement> {
126 let mut elements = Vec::new();
127 let mut buffer = String::new();
128 let mut i = 0;
129
130 while i < tokens.len() {
131 let token = &tokens[i];
132
133 if let Some(expected_backticks) = self.state.code_backticks {
135 match token {
136 Token::Backticks(n) if *n == expected_backticks => {
137 let code = std::mem::take(&mut self.state.code_buffer);
139 let code = code.strip_prefix(' ').unwrap_or(&code);
141 let code = code.strip_suffix(' ').unwrap_or(code);
142 elements.push(InlineElement::Code(code.to_string()));
143 self.state.code_backticks = None;
144 }
145 _ => {
146 match token {
148 Token::Text(s) => self.state.code_buffer.push_str(s),
149 Token::Backticks(n) => {
150 self.state.code_buffer.push_str(&"`".repeat(*n));
151 }
152 _ => {
153 if let Some(marker) = token.marker_str() {
154 self.state.code_buffer.push_str(marker);
155 }
156 }
157 }
158 }
159 }
160 i += 1;
161 continue;
162 }
163
164 match token {
165 Token::Text(s) => {
166 buffer.push_str(s);
167 }
168
169 Token::Backticks(n) => {
170 if !buffer.is_empty() {
172 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
173 }
174 self.state.code_backticks = Some(*n);
176 }
177
178 Token::TripleAsterisk => {
179 if !buffer.is_empty() {
181 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
182 }
183
184 if self.state.bold && self.state.italic {
185 self.state.bold = false;
187 self.state.italic = false;
188 } else if !self.state.bold && !self.state.italic {
189 self.state.bold = true;
191 self.state.italic = true;
192 } else {
193 buffer.push_str("***");
195 }
196 }
197
198 Token::DoubleAsterisk => {
199 if !buffer.is_empty() {
200 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
201 }
202 self.state.bold = !self.state.bold;
203 }
204
205 Token::Asterisk => {
206 if !buffer.is_empty() {
207 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
208 }
209 self.state.italic = !self.state.italic;
210 }
211
212 Token::DoubleAsteriskUnderscore => {
213 if !buffer.is_empty() {
215 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
216 }
217 if !self.state.bold {
218 self.state.bold = true;
219 }
220 self.state.italic = !self.state.italic;
221 }
222
223 Token::UnderscoreDoubleAsterisk => {
224 if !buffer.is_empty() {
226 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
227 }
228 self.state.italic = false;
229 self.state.bold = false;
230 }
231
232 Token::TripleUnderscore => {
233 if !buffer.is_empty() {
234 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
235 }
236
237 if self.state.underline && self.state.italic {
238 self.state.underline = false;
239 self.state.italic = false;
240 } else if !self.state.underline && !self.state.italic {
241 self.state.underline = true;
242 self.state.italic = true;
243 } else {
244 buffer.push_str("___");
245 }
246 }
247
248 Token::DoubleUnderscore => {
249 if !buffer.is_empty() {
250 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
251 }
252 self.state.underline = !self.state.underline;
253 }
254
255 Token::Underscore => {
256 let prev_char_is_alnum = i > 0
260 && matches!(&tokens[i - 1], Token::Text(s) if s.chars().last().map(|c| c.is_alphanumeric()).unwrap_or(false));
261 let next_char_is_alnum = i + 1 < tokens.len()
262 && matches!(&tokens[i + 1], Token::Text(s) if s.chars().next().map(|c| c.is_alphanumeric()).unwrap_or(false));
263
264 if prev_char_is_alnum && next_char_is_alnum {
265 buffer.push('_');
267 } else {
268 if !buffer.is_empty() {
269 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
270 }
271 self.state.italic = !self.state.italic;
272 }
273 }
274
275 Token::DoubleTilde => {
276 if !buffer.is_empty() {
277 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
278 }
279 self.state.strikeout = !self.state.strikeout;
280 }
281
282 Token::Link { text, url } => {
283 if !buffer.is_empty() {
284 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
285 }
286 elements.push(InlineElement::Link {
287 text: text.clone(),
288 url: url.clone(),
289 });
290 }
291
292 Token::Image { alt, url } => {
293 if !buffer.is_empty() {
294 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
295 }
296 elements.push(InlineElement::Image {
297 alt: alt.clone(),
298 url: url.clone(),
299 });
300 }
301
302 Token::Footnote(num) => {
303 if !buffer.is_empty() {
304 self.emit_formatted(&mut elements, std::mem::take(&mut buffer));
305 }
306 let superscript = number_to_superscript(*num);
308 elements.push(InlineElement::Footnote(superscript));
309 }
310 }
311
312 i += 1;
313 }
314
315 if !buffer.is_empty() {
317 self.emit_formatted(&mut elements, buffer);
318 }
319
320 if self.state.code_backticks.is_some() {
322 let code = std::mem::take(&mut self.state.code_buffer);
323 if !code.is_empty() {
324 elements.push(InlineElement::Code(code));
325 }
326 self.state.code_backticks = None;
327 }
328
329 self.state.reset();
331
332 elements
333 }
334
335 fn emit_formatted(&self, elements: &mut Vec<InlineElement>, text: String) {
337 if text.is_empty() {
338 return;
339 }
340
341 if self.state.bold && self.state.italic {
342 elements.push(InlineElement::BoldItalic(text));
343 } else if self.state.bold {
344 elements.push(InlineElement::Bold(text));
345 } else if self.state.italic {
346 elements.push(InlineElement::Italic(text));
347 } else if self.state.underline {
348 elements.push(InlineElement::Underline(text));
349 } else if self.state.strikeout {
350 elements.push(InlineElement::Strikeout(text));
351 } else {
352 elements.push(InlineElement::Text(text));
353 }
354 }
355
356 pub fn reset(&mut self) {
358 self.state.reset();
359 }
360}
361
362fn number_to_superscript(num: u32) -> String {
364 num.to_string()
365 .chars()
366 .map(|c| {
367 let digit = c.to_digit(10).unwrap_or(0) as u8;
368 digit_to_superscript(digit)
369 })
370 .collect()
371}
372
373pub fn format_line(line: &str, process_links: bool, process_images: bool) -> String {
378 use streamdown_ansi::codes::*;
379 use streamdown_ansi::style::*;
380
381 let mut parser = InlineParser::with_settings(process_links, process_images);
382 let elements = parser.parse(line);
383
384 let mut result = String::new();
385
386 for element in elements {
387 match element {
388 InlineElement::Text(s) => result.push_str(&s),
389 InlineElement::Bold(s) => {
390 result.push_str(BOLD.0);
391 result.push_str(&s);
392 result.push_str(BOLD.1);
393 }
394 InlineElement::Italic(s) => {
395 result.push_str(ITALIC.0);
396 result.push_str(&s);
397 result.push_str(ITALIC.1);
398 }
399 InlineElement::BoldItalic(s) => {
400 result.push_str(BOLD.0);
401 result.push_str(ITALIC.0);
402 result.push_str(&s);
403 result.push_str(ITALIC.1);
404 result.push_str(BOLD.1);
405 }
406 InlineElement::Underline(s) => {
407 result.push_str(UNDERLINE.0);
408 result.push_str(&s);
409 result.push_str(UNDERLINE.1);
410 }
411 InlineElement::Strikeout(s) => {
412 result.push_str(STRIKEOUT.0);
413 result.push_str(&s);
414 result.push_str(STRIKEOUT.1);
415 }
416 InlineElement::Code(s) => {
417 result.push_str(DIM_ON);
418 result.push_str(&s);
419 result.push_str(DIM_OFF);
420 }
421 InlineElement::Link { text, url } => {
422 result.push_str(LINK.0);
423 result.push_str(&url);
424 result.push('\x1b');
425 result.push_str(UNDERLINE.0);
426 result.push_str(&text);
427 result.push_str(UNDERLINE.1);
428 result.push_str(LINK.1);
429 }
430 InlineElement::Image { alt, url: _ } => {
431 result.push_str(DIM_ON);
432 result.push_str("[\u{1F5BC} ");
433 result.push_str(&alt);
434 result.push(']');
435 result.push_str(DIM_OFF);
436 }
437 InlineElement::Footnote(s) => {
438 result.push_str(&s);
439 }
440 }
441 }
442
443 result
444}
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449
450 #[test]
451 fn test_parse_plain_text() {
452 let mut parser = InlineParser::new();
453 let elements = parser.parse("Hello world");
454 assert_eq!(
455 elements,
456 vec![InlineElement::Text("Hello world".to_string())]
457 );
458 }
459
460 #[test]
461 fn test_parse_bold() {
462 let mut parser = InlineParser::new();
463 let elements = parser.parse("Hello **bold** world");
464 assert_eq!(
465 elements,
466 vec![
467 InlineElement::Text("Hello ".to_string()),
468 InlineElement::Bold("bold".to_string()),
469 InlineElement::Text(" world".to_string()),
470 ]
471 );
472 }
473
474 #[test]
475 fn test_parse_italic() {
476 let mut parser = InlineParser::new();
477 let elements = parser.parse("Hello *italic* world");
478 assert_eq!(
479 elements,
480 vec![
481 InlineElement::Text("Hello ".to_string()),
482 InlineElement::Italic("italic".to_string()),
483 InlineElement::Text(" world".to_string()),
484 ]
485 );
486 }
487
488 #[test]
489 fn test_parse_bold_italic() {
490 let mut parser = InlineParser::new();
491 let elements = parser.parse("Hello ***bold italic*** world");
492 assert_eq!(
493 elements,
494 vec![
495 InlineElement::Text("Hello ".to_string()),
496 InlineElement::BoldItalic("bold italic".to_string()),
497 InlineElement::Text(" world".to_string()),
498 ]
499 );
500 }
501
502 #[test]
503 fn test_parse_strikethrough() {
504 let mut parser = InlineParser::new();
505 let elements = parser.parse("Hello ~~strike~~ world");
506 assert_eq!(
507 elements,
508 vec![
509 InlineElement::Text("Hello ".to_string()),
510 InlineElement::Strikeout("strike".to_string()),
511 InlineElement::Text(" world".to_string()),
512 ]
513 );
514 }
515
516 #[test]
517 fn test_parse_inline_code() {
518 let mut parser = InlineParser::new();
519 let elements = parser.parse("Use `code` here");
520 assert_eq!(
521 elements,
522 vec![
523 InlineElement::Text("Use ".to_string()),
524 InlineElement::Code("code".to_string()),
525 InlineElement::Text(" here".to_string()),
526 ]
527 );
528 }
529
530 #[test]
531 fn test_parse_double_backtick_code() {
532 let mut parser = InlineParser::new();
533 let elements = parser.parse("Use `` `backticks` `` here");
534 assert_eq!(
535 elements,
536 vec![
537 InlineElement::Text("Use ".to_string()),
538 InlineElement::Code("`backticks`".to_string()),
539 InlineElement::Text(" here".to_string()),
540 ]
541 );
542 }
543
544 #[test]
545 fn test_parse_link() {
546 let mut parser = InlineParser::new();
547 let elements = parser.parse("Check [this](http://example.com) out");
548
549 assert!(elements.iter().any(|e| matches!(
550 e,
551 InlineElement::Link { text, url }
552 if text == "this" && url == "http://example.com"
553 )));
554 }
555
556 #[test]
557 fn test_parse_image() {
558 let mut parser = InlineParser::new();
559 let elements = parser.parse("See  here");
560
561 assert!(elements.iter().any(|e| matches!(
562 e,
563 InlineElement::Image { alt, url }
564 if alt == "alt text" && url == "http://img.png"
565 )));
566 }
567
568 #[test]
569 fn test_parse_footnote() {
570 let mut parser = InlineParser::new();
571 let elements = parser.parse("Some text[^1] here");
572
573 assert!(elements
574 .iter()
575 .any(|e| matches!(e, InlineElement::Footnote(s) if s == "¹")));
576 }
577
578 #[test]
579 fn test_parse_footnote_multi_digit() {
580 let mut parser = InlineParser::new();
581 let elements = parser.parse("Reference[^42]");
582
583 assert!(elements
584 .iter()
585 .any(|e| matches!(e, InlineElement::Footnote(s) if s == "⁴²")));
586 }
587
588 #[test]
589 fn test_underscore_in_word() {
590 let mut parser = InlineParser::new();
591 let elements = parser.parse("some_variable_name");
592 assert_eq!(
594 elements,
595 vec![InlineElement::Text("some_variable_name".to_string())]
596 );
597 }
598
599 #[test]
600 fn test_underscore_in_word_with_surrounding_text() {
601 let mut parser = InlineParser::new();
604 let elements = parser.parse("use sem_search tool");
605 assert_eq!(
606 elements,
607 vec![InlineElement::Text("use sem_search tool".to_string())]
608 );
609 }
610
611 #[test]
612 fn test_underscore_at_start_of_text() {
613 let mut parser = InlineParser::new();
614 let elements = parser.parse("sem_search");
615 assert_eq!(
616 elements,
617 vec![InlineElement::Text("sem_search".to_string())]
618 );
619 }
620
621 #[test]
622 fn test_underscore_at_end_of_text() {
623 let mut parser = InlineParser::new();
624 let elements = parser.parse("sem_search is useful");
625 assert_eq!(
626 elements,
627 vec![InlineElement::Text("sem_search is useful".to_string())]
628 );
629 }
630
631 #[test]
632 fn test_multiple_underscores_in_text() {
633 let mut parser = InlineParser::new();
634 let elements = parser.parse("use my_var_name here");
635 assert_eq!(
636 elements,
637 vec![InlineElement::Text("use my_var_name here".to_string())]
638 );
639 }
640
641 #[test]
642 fn test_underscore_italic_still_works() {
643 let mut parser = InlineParser::new();
645 let elements = parser.parse("this is _italic_ text");
646 assert_eq!(
647 elements,
648 vec![
649 InlineElement::Text("this is ".to_string()),
650 InlineElement::Italic("italic".to_string()),
651 InlineElement::Text(" text".to_string()),
652 ]
653 );
654 }
655
656 #[test]
657 fn test_underscore_italic_at_boundaries() {
658 let mut parser = InlineParser::new();
660 let elements = parser.parse("word _italic_");
661 assert_eq!(
662 elements,
663 vec![
664 InlineElement::Text("word ".to_string()),
665 InlineElement::Italic("italic".to_string()),
666 ]
667 );
668 }
669
670 #[test]
671 fn test_mixed_underscore_scenarios() {
672 let mut parser = InlineParser::new();
674 let elements = parser.parse("use my_func for _emphasis_");
675 assert_eq!(
676 elements,
677 vec![
678 InlineElement::Text("use my_func for ".to_string()),
679 InlineElement::Italic("emphasis".to_string()),
680 ]
681 );
682 }
683
684 #[test]
685 fn test_format_line() {
686 let result = format_line("Hello **bold** world", true, true);
687 assert!(result.contains("bold"));
688 assert!(result.contains("\x1b[1m")); assert!(result.contains("\x1b[22m")); }
691
692 #[test]
693 fn test_number_to_superscript() {
694 assert_eq!(number_to_superscript(0), "⁰");
695 assert_eq!(number_to_superscript(1), "¹");
696 assert_eq!(number_to_superscript(2), "²");
697 assert_eq!(number_to_superscript(42), "⁴²");
698 assert_eq!(number_to_superscript(123), "¹²³");
699 }
700}