Skip to main content

speechmarkdown_rust/
ast.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Abstract Syntax Tree node for SpeechMarkdown
5#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
6pub struct AstNode {
7    /// Type of the AST node
8    pub node_type: NodeType,
9
10    /// Text content of the node
11    pub text: String,
12
13    /// Child nodes
14    pub children: Vec<AstNode>,
15
16    /// Position in the source text (if available)
17    pub position: Option<Position>,
18
19    /// Additional attributes (modifier values, etc.)
20    pub attributes: HashMap<String, String>,
21
22    /// Ordered attribute keys (preserves insertion order)
23    pub attribute_keys: Vec<String>,
24}
25
26impl AstNode {
27    /// Create a new AST node
28    pub fn new(node_type: NodeType, text: impl Into<String>) -> Self {
29        Self {
30            node_type,
31            text: text.into(),
32            children: Vec::new(),
33            position: None,
34            attributes: HashMap::new(),
35            attribute_keys: Vec::new(),
36        }
37    }
38
39    /// Add a child node
40    pub fn add_child(mut self, child: AstNode) -> Self {
41        self.children.push(child);
42        self
43    }
44
45    /// Set position
46    pub fn with_position(mut self, position: Position) -> Self {
47        self.position = Some(position);
48        self
49    }
50
51    /// Add an attribute
52    pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
53        let key = key.into();
54        if !self.attributes.contains_key(&key) {
55            self.attribute_keys.push(key.clone());
56        }
57        self.attributes.insert(key, value.into());
58        self
59    }
60
61    /// Create a document node
62    pub fn document() -> Self {
63        Self::new(NodeType::Document, "")
64    }
65
66    /// Create a plain text node
67    pub fn text(text: impl Into<String>) -> Self {
68        Self::new(NodeType::PlainText, text)
69    }
70}
71
72/// Position in source text
73#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
74pub struct Position {
75    /// Start byte offset
76    pub start: usize,
77
78    /// End byte offset
79    pub end: usize,
80
81    /// Line number (1-indexed)
82    pub line: usize,
83
84    /// Column number (1-indexed)
85    pub column: usize,
86}
87
88impl Position {
89    pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self {
90        Self {
91            start,
92            end,
93            line,
94            column,
95        }
96    }
97}
98
99/// Types of AST nodes in SpeechMarkdown
100#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
101pub enum NodeType {
102    // Structural nodes
103    /// Root document node
104    Document,
105
106    /// Paragraph node
107    Paragraph,
108
109    /// Simple line (no modifiers)
110    SimpleLine,
111
112    /// Empty line
113    EmptyLine,
114
115    /// Section with modifiers
116    Section,
117
118    // Content nodes
119    /// Plain text content
120    PlainText,
121
122    /// Text with special characters
123    PlainTextSpecialChars,
124
125    /// Text in emphasis context
126    PlainTextEmphasis,
127
128    // Markup nodes
129    /// Short break notation [time]
130    ShortBreak,
131
132    /// Extended break with strength
133    Break,
134
135    /// Moderate emphasis +text+
136    ShortEmphasisModerate,
137
138    /// Strong emphasis ++text++
139    ShortEmphasisStrong,
140
141    /// No emphasis ~text~
142    ShortEmphasisNone,
143
144    /// Reduced emphasis -text-
145    ShortEmphasisReduced,
146
147    /// Text modifier (text)[key:value]
148    TextModifier,
149
150    /// Short IPA notation (/text/phoneme/)
151    ShortIpa,
152
153    /// Bare IPA notation /phoneme/
154    BareIpa,
155
156    /// Short substitution {text}alias
157    ShortSub,
158
159    /// Audio element ![caption](url)
160    Audio,
161
162    /// Mark tag
163    Mark,
164
165    // Modifier types (for text modifiers and sections)
166    /// Emphasis modifier
167    Emphasis,
168
169    /// Voice modifier
170    Voice,
171
172    /// Language modifier
173    Lang,
174
175    /// Rate modifier
176    Rate,
177
178    /// Pitch modifier
179    Pitch,
180
181    /// Volume modifier
182    Volume,
183
184    /// Whisper modifier
185    Whisper,
186
187    /// Excited modifier
188    Excited,
189
190    /// Disappointed modifier
191    Disappointed,
192
193    /// Newscaster modifier
194    Newscaster,
195
196    /// DJ modifier
197    Dj,
198
199    /// Date modifier
200    Date,
201
202    /// Time modifier
203    Time,
204
205    /// Number modifier
206    Number,
207
208    /// Ordinal modifier
209    Ordinal,
210
211    /// Characters modifier
212    Characters,
213
214    /// Fraction modifier
215    Fraction,
216
217    /// Telephone modifier
218    Telephone,
219
220    /// Unit modifier
221    Unit,
222
223    /// Address modifier
224    Address,
225
226    /// Interjection modifier
227    Interjection,
228
229    /// Expletive/Bleep modifier
230    Expletive,
231
232    /// IPA modifier
233    Ipa,
234
235    /// Substitution modifier
236    Sub,
237}
238
239impl NodeType {
240    /// Check if this node type represents emphasis
241    pub fn is_emphasis(&self) -> bool {
242        matches!(
243            self,
244            NodeType::ShortEmphasisModerate
245                | NodeType::ShortEmphasisStrong
246                | NodeType::ShortEmphasisNone
247                | NodeType::ShortEmphasisReduced
248                | NodeType::Emphasis
249        )
250    }
251
252    /// Check if this node type represents a break/pause
253    pub fn is_break(&self) -> bool {
254        matches!(self, NodeType::ShortBreak | NodeType::Break)
255    }
256
257    /// Check if this node type represents a modifier
258    pub fn is_modifier(&self) -> bool {
259        matches!(
260            self,
261            NodeType::Emphasis
262                | NodeType::Voice
263                | NodeType::Lang
264                | NodeType::Rate
265                | NodeType::Pitch
266                | NodeType::Volume
267                | NodeType::Whisper
268                | NodeType::Excited
269                | NodeType::Disappointed
270                | NodeType::Newscaster
271                | NodeType::Dj
272                | NodeType::Date
273                | NodeType::Time
274                | NodeType::Number
275                | NodeType::Ordinal
276                | NodeType::Characters
277                | NodeType::Fraction
278                | NodeType::Telephone
279                | NodeType::Unit
280                | NodeType::Address
281                | NodeType::Interjection
282                | NodeType::Expletive
283                | NodeType::Ipa
284                | NodeType::Sub
285        )
286    }
287}