Skip to main content

speechmarkdown_rust/formatters/
text.rs

1use crate::ast::{AstNode, NodeType};
2use crate::error::Result;
3use crate::formatters::base::Formatter;
4
5/// Plain text formatter - strips all markup
6pub struct TextFormatter {
7    preserve_empty_lines: bool,
8}
9
10impl TextFormatter {
11    pub fn new() -> Self {
12        Self {
13            preserve_empty_lines: true,
14        }
15    }
16
17    pub fn with_options(preserve_empty_lines: bool) -> Self {
18        Self {
19            preserve_empty_lines,
20        }
21    }
22}
23
24impl Default for TextFormatter {
25    fn default() -> Self {
26        Self::new()
27    }
28}
29
30impl Formatter for TextFormatter {
31    fn format(&self, ast: &AstNode) -> Result<String> {
32        let mut result = Vec::new();
33        self.format_node_recursive(ast, &mut result);
34        let text = result.join("");
35
36        // Clean up whitespace
37        let text = self.clean_whitespace(&text);
38
39        Ok(text)
40    }
41
42    fn format_node(&self, node: &AstNode) -> Result<String> {
43        let mut result = Vec::new();
44        self.format_node_recursive(node, &mut result);
45        Ok(result.join(""))
46    }
47}
48
49impl TextFormatter {
50    fn format_node_recursive(&self, node: &AstNode, result: &mut Vec<String>) {
51        match node.node_type {
52            // Structural nodes - process children
53            NodeType::Document | NodeType::Paragraph | NodeType::SimpleLine => {
54                for child in &node.children {
55                    self.format_node_recursive(child, result);
56                }
57            }
58
59            // Empty lines
60            NodeType::EmptyLine => {
61                if self.preserve_empty_lines {
62                    result.push("\n\n".to_string());
63                } else {
64                    result.push("\n".to_string());
65                }
66            }
67
68            // Plain text content
69            NodeType::PlainText | NodeType::PlainTextSpecialChars | NodeType::PlainTextEmphasis => {
70                result.push(node.text.clone());
71            }
72
73            // Breaks - add space
74            NodeType::ShortBreak | NodeType::Break => {
75                result.push(" ".to_string());
76            }
77
78            // Emphasis - just use the text
79            NodeType::ShortEmphasisModerate
80            | NodeType::ShortEmphasisStrong
81            | NodeType::ShortEmphasisNone
82            | NodeType::ShortEmphasisReduced => {
83                result.push(node.text.clone());
84            }
85
86            // Text modifiers - extract the text content
87            NodeType::TextModifier => {
88                // For text modifiers, the text is stored in the node's text field
89                result.push(node.text.clone());
90            }
91
92            // IPA - use the text content (pronunciation)
93            NodeType::ShortIpa => {
94                result.push(node.text.clone());
95            }
96            NodeType::BareIpa => {
97                if let Some(ph) = node.attributes.get("ph") {
98                    result.push(ph.clone());
99                } else {
100                    result.push(node.text.clone());
101                }
102            }
103
104            // Substitution - use the alias if available, otherwise the text
105            NodeType::ShortSub => {
106                result.push(node.text.clone());
107            }
108
109            // Audio - no output in plain text
110            NodeType::Audio => {
111                // Audio elements produce no text output
112            }
113
114            // Mark tags - no output
115            NodeType::Mark => {
116                // Mark tags produce no output
117            }
118
119            // Modifiers are handled as part of text modifiers
120            NodeType::Emphasis
121            | NodeType::Voice
122            | NodeType::Lang
123            | NodeType::Rate
124            | NodeType::Pitch
125            | NodeType::Volume
126            | NodeType::Whisper
127            | NodeType::Excited
128            | NodeType::Disappointed
129            | NodeType::Newscaster
130            | NodeType::Dj
131            | NodeType::Date
132            | NodeType::Time
133            | NodeType::Number
134            | NodeType::Ordinal
135            | NodeType::Characters
136            | NodeType::Fraction
137            | NodeType::Telephone
138            | NodeType::Unit
139            | NodeType::Address
140            | NodeType::Interjection
141            | NodeType::Expletive
142            | NodeType::Ipa
143            | NodeType::Sub => {
144                // These are handled as part of text modifiers, not standalone
145            }
146
147            // Section - process children
148            NodeType::Section => {
149                for child in &node.children {
150                    self.format_node_recursive(child, result);
151                }
152            }
153        }
154    }
155
156    fn clean_whitespace(&self, text: &str) -> String {
157        let lines: Vec<&str> = text.lines().collect();
158        let cleaned: Vec<String> = lines
159            .iter()
160            .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
161            .filter(|line| !line.is_empty())
162            .collect();
163        let result = cleaned.join("\n");
164        result.trim().to_string()
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use crate::parser::SpeechMarkdownParser;
172
173    #[test]
174    fn test_format_plain_text() {
175        let ast = SpeechMarkdownParser::parse("Hello world").unwrap();
176
177        let formatter = TextFormatter::new();
178        let result = formatter.format(&ast).unwrap();
179
180        assert_eq!(result, "Hello world");
181    }
182
183    #[test]
184    fn test_format_with_breaks() {
185        let ast = SpeechMarkdownParser::parse("Sample [2s] text").unwrap();
186
187        let formatter = TextFormatter::new();
188        let result = formatter.format(&ast).unwrap();
189
190        assert_eq!(result, "Sample text");
191    }
192
193    #[test]
194    fn test_format_with_emphasis() {
195        let ast = SpeechMarkdownParser::parse("++strong emphasis++").unwrap();
196
197        let formatter = TextFormatter::new();
198        let result = formatter.format(&ast).unwrap();
199
200        assert_eq!(result, "strong emphasis");
201    }
202
203    #[test]
204    fn test_format_with_text_modifier() {
205        let ast = SpeechMarkdownParser::parse("(text)[voice:\"Kendra\"]").unwrap();
206
207        let formatter = TextFormatter::new();
208        let result = formatter.format(&ast).unwrap();
209
210        assert_eq!(result, "text");
211    }
212
213    #[test]
214    fn test_format_with_substitution() {
215        let input = "{Al}aluminum";
216        let ast = SpeechMarkdownParser::parse(input).unwrap();
217
218        let formatter = TextFormatter::new();
219        let result = formatter.format(&ast).unwrap();
220
221        assert_eq!(result, "Al");
222    }
223
224    #[test]
225    fn test_format_complex_sentence() {
226        let ast = SpeechMarkdownParser::parse("Why do you keep switching voices (from one)[voice:\"Brian\"] to (the other)[voice:\"Kendra\"]?").unwrap();
227
228        let formatter = TextFormatter::new();
229        let result = formatter.format(&ast).unwrap();
230
231        assert_eq!(
232            result,
233            "Why do you keep switching voices from one to the other?"
234        );
235    }
236
237    #[test]
238    fn test_format_with_audio() {
239        let ast =
240            SpeechMarkdownParser::parse("Hello ![sound](\"https://example.com/audio.mp3\") world")
241                .unwrap();
242
243        let formatter = TextFormatter::new();
244        let result = formatter.format(&ast).unwrap();
245
246        assert_eq!(result, "Hello world");
247    }
248}