speechmarkdown_rust/formatters/
text.rs1use crate::ast::{AstNode, NodeType};
2use crate::error::Result;
3use crate::formatters::base::Formatter;
4
5pub struct TextFormatter {
7 preserve_empty_lines: bool,
8}
9
10impl TextFormatter {
11 pub fn new() -> Self {
12 Self {
13 preserve_empty_lines: true,
14 }
15 }
16
17 pub fn with_options(preserve_empty_lines: bool) -> Self {
18 Self {
19 preserve_empty_lines,
20 }
21 }
22}
23
24impl Default for TextFormatter {
25 fn default() -> Self {
26 Self::new()
27 }
28}
29
30impl Formatter for TextFormatter {
31 fn format(&self, ast: &AstNode) -> Result<String> {
32 let mut result = Vec::new();
33 self.format_node_recursive(ast, &mut result);
34 let text = result.join("");
35
36 let text = self.clean_whitespace(&text);
38
39 Ok(text)
40 }
41
42 fn format_node(&self, node: &AstNode) -> Result<String> {
43 let mut result = Vec::new();
44 self.format_node_recursive(node, &mut result);
45 Ok(result.join(""))
46 }
47}
48
49impl TextFormatter {
50 fn format_node_recursive(&self, node: &AstNode, result: &mut Vec<String>) {
51 match node.node_type {
52 NodeType::Document | NodeType::Paragraph | NodeType::SimpleLine => {
54 for child in &node.children {
55 self.format_node_recursive(child, result);
56 }
57 }
58
59 NodeType::EmptyLine => {
61 if self.preserve_empty_lines {
62 result.push("\n\n".to_string());
63 } else {
64 result.push("\n".to_string());
65 }
66 }
67
68 NodeType::PlainText | NodeType::PlainTextSpecialChars | NodeType::PlainTextEmphasis => {
70 result.push(node.text.clone());
71 }
72
73 NodeType::ShortBreak | NodeType::Break => {
75 result.push(" ".to_string());
76 }
77
78 NodeType::ShortEmphasisModerate
80 | NodeType::ShortEmphasisStrong
81 | NodeType::ShortEmphasisNone
82 | NodeType::ShortEmphasisReduced => {
83 result.push(node.text.clone());
84 }
85
86 NodeType::TextModifier => {
88 result.push(node.text.clone());
90 }
91
92 NodeType::ShortIpa => {
94 result.push(node.text.clone());
95 }
96 NodeType::BareIpa => {
97 if let Some(ph) = node.attributes.get("ph") {
98 result.push(ph.clone());
99 } else {
100 result.push(node.text.clone());
101 }
102 }
103
104 NodeType::ShortSub => {
106 result.push(node.text.clone());
107 }
108
109 NodeType::Audio => {
111 }
113
114 NodeType::Mark => {
116 }
118
119 NodeType::Emphasis
121 | NodeType::Voice
122 | NodeType::Lang
123 | NodeType::Rate
124 | NodeType::Pitch
125 | NodeType::Volume
126 | NodeType::Whisper
127 | NodeType::Excited
128 | NodeType::Disappointed
129 | NodeType::Newscaster
130 | NodeType::Dj
131 | NodeType::Date
132 | NodeType::Time
133 | NodeType::Number
134 | NodeType::Ordinal
135 | NodeType::Characters
136 | NodeType::Fraction
137 | NodeType::Telephone
138 | NodeType::Unit
139 | NodeType::Address
140 | NodeType::Interjection
141 | NodeType::Expletive
142 | NodeType::Ipa
143 | NodeType::Sub => {
144 }
146
147 NodeType::Section => {
149 for child in &node.children {
150 self.format_node_recursive(child, result);
151 }
152 }
153 }
154 }
155
156 fn clean_whitespace(&self, text: &str) -> String {
157 let lines: Vec<&str> = text.lines().collect();
158 let cleaned: Vec<String> = lines
159 .iter()
160 .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
161 .filter(|line| !line.is_empty())
162 .collect();
163 let result = cleaned.join("\n");
164 result.trim().to_string()
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use crate::parser::SpeechMarkdownParser;
172
173 #[test]
174 fn test_format_plain_text() {
175 let ast = SpeechMarkdownParser::parse("Hello world").unwrap();
176
177 let formatter = TextFormatter::new();
178 let result = formatter.format(&ast).unwrap();
179
180 assert_eq!(result, "Hello world");
181 }
182
183 #[test]
184 fn test_format_with_breaks() {
185 let ast = SpeechMarkdownParser::parse("Sample [2s] text").unwrap();
186
187 let formatter = TextFormatter::new();
188 let result = formatter.format(&ast).unwrap();
189
190 assert_eq!(result, "Sample text");
191 }
192
193 #[test]
194 fn test_format_with_emphasis() {
195 let ast = SpeechMarkdownParser::parse("++strong emphasis++").unwrap();
196
197 let formatter = TextFormatter::new();
198 let result = formatter.format(&ast).unwrap();
199
200 assert_eq!(result, "strong emphasis");
201 }
202
203 #[test]
204 fn test_format_with_text_modifier() {
205 let ast = SpeechMarkdownParser::parse("(text)[voice:\"Kendra\"]").unwrap();
206
207 let formatter = TextFormatter::new();
208 let result = formatter.format(&ast).unwrap();
209
210 assert_eq!(result, "text");
211 }
212
213 #[test]
214 fn test_format_with_substitution() {
215 let input = "{Al}aluminum";
216 let ast = SpeechMarkdownParser::parse(input).unwrap();
217
218 let formatter = TextFormatter::new();
219 let result = formatter.format(&ast).unwrap();
220
221 assert_eq!(result, "Al");
222 }
223
224 #[test]
225 fn test_format_complex_sentence() {
226 let ast = SpeechMarkdownParser::parse("Why do you keep switching voices (from one)[voice:\"Brian\"] to (the other)[voice:\"Kendra\"]?").unwrap();
227
228 let formatter = TextFormatter::new();
229 let result = formatter.format(&ast).unwrap();
230
231 assert_eq!(
232 result,
233 "Why do you keep switching voices from one to the other?"
234 );
235 }
236
237 #[test]
238 fn test_format_with_audio() {
239 let ast =
240 SpeechMarkdownParser::parse("Hello  world")
241 .unwrap();
242
243 let formatter = TextFormatter::new();
244 let result = formatter.format(&ast).unwrap();
245
246 assert_eq!(result, "Hello world");
247 }
248}