Skip to main content

sim_codec/implementation/
domain_form.rs

1//! A generic `#(...)` domain-form parser and formatter.
2//!
3//! Several domain codecs (`music-shapes`, `sound-shapes`, ...) hand-rolled the
4//! same `#(Name key=value value [list,...])` reader. This is that grammar, with
5//! no domain knowledge:
6//!
7//! ```text
8//! form  = "#(" name item* ")"
9//! item  = key "=" value | value
10//! value = form | "[" value ("," value)* "]" | string | atom
11//! ```
12//!
13//! Output is ASCII. A domain crate parses with [`parse_domain_form`], reads
14//! fields with [`DomainForm::atom`]/[`string`](DomainForm::string)/
15//! [`list`](DomainForm::list), and (optionally) renders with
16//! [`format_domain_form`].
17
18/// A parsed domain-form value.
19#[derive(Clone, Debug, PartialEq)]
20pub enum DomainValue {
21    /// A nested `#(...)` form.
22    Form(DomainForm),
23    /// A `[...]` list.
24    List(Vec<DomainValue>),
25    /// A `"..."` string.
26    String(String),
27    /// A bare atom (number, identifier, `4/4`, ...).
28    Atom(String),
29}
30
31/// A parsed `#(name ...)` form: a name, keyed fields, and positional values.
32#[derive(Clone, Debug, PartialEq)]
33pub struct DomainForm {
34    /// The form name.
35    pub name: String,
36    /// Keyed `key=value` fields, in order.
37    pub fields: Vec<(String, DomainValue)>,
38    /// Positional (un-keyed) values, in order.
39    pub positional: Vec<DomainValue>,
40}
41
42impl DomainForm {
43    /// The value of keyed field `key`, if present.
44    pub fn field(&self, key: &str) -> Option<&DomainValue> {
45        self.fields
46            .iter()
47            .find_map(|(name, value)| (name == key).then_some(value))
48    }
49
50    /// The atom string of keyed field `key`.
51    pub fn atom(&self, key: &str) -> Result<&str, DomainFormError> {
52        match self.field(key) {
53            Some(DomainValue::Atom(value)) => Ok(value),
54            Some(_) => Err(DomainFormError::WrongFieldKind(key.to_owned())),
55            None => Err(DomainFormError::MissingField(key.to_owned())),
56        }
57    }
58
59    /// The string of keyed field `key`.
60    pub fn string(&self, key: &str) -> Result<&str, DomainFormError> {
61        match self.field(key) {
62            Some(DomainValue::String(value)) => Ok(value),
63            Some(_) => Err(DomainFormError::WrongFieldKind(key.to_owned())),
64            None => Err(DomainFormError::MissingField(key.to_owned())),
65        }
66    }
67
68    /// The list items of keyed field `key`.
69    pub fn list(&self, key: &str) -> Result<&[DomainValue], DomainFormError> {
70        match self.field(key) {
71            Some(DomainValue::List(items)) => Ok(items),
72            Some(_) => Err(DomainFormError::WrongFieldKind(key.to_owned())),
73            None => Err(DomainFormError::MissingField(key.to_owned())),
74        }
75    }
76
77    /// The nested form of keyed field `key`.
78    pub fn form(&self, key: &str) -> Result<&DomainForm, DomainFormError> {
79        match self.field(key) {
80            Some(DomainValue::Form(value)) => Ok(value),
81            Some(_) => Err(DomainFormError::WrongFieldKind(key.to_owned())),
82            None => Err(DomainFormError::MissingField(key.to_owned())),
83        }
84    }
85}
86
87/// A domain-form parse or access error.
88#[derive(Clone, Debug, PartialEq, Eq)]
89pub enum DomainFormError {
90    /// Input did not start with `#(`.
91    ExpectedForm,
92    /// Input ended mid-form.
93    UnexpectedEof,
94    /// An invalid character was found where a token was expected.
95    InvalidToken,
96    /// A form repeated a field key.
97    DuplicateField(String),
98    /// Extra input followed the top-level form.
99    TrailingInput,
100    /// A required field was missing.
101    MissingField(String),
102    /// A field had the wrong value kind.
103    WrongFieldKind(String),
104}
105
106/// Parse a top-level `#(...)` domain form.
107///
108/// # Examples
109///
110/// ```
111/// use sim_codec::{parse_domain_form, DomainValue};
112///
113/// let form = parse_domain_form("#(Note dur=4/4 60 64)").unwrap();
114/// assert_eq!(form.name, "Note");
115/// assert_eq!(form.atom("dur").unwrap(), "4/4");
116/// assert_eq!(
117///     form.positional,
118///     vec![DomainValue::Atom("60".into()), DomainValue::Atom("64".into())],
119/// );
120/// ```
121pub fn parse_domain_form(input: &str) -> Result<DomainForm, DomainFormError> {
122    let mut parser = Parser { input, index: 0 };
123    parser.skip_ws();
124    if !parser.consume_str("#(") {
125        return Err(DomainFormError::ExpectedForm);
126    }
127    let form = parser.parse_form_body()?;
128    parser.skip_ws();
129    if parser.index != parser.input.len() {
130        return Err(DomainFormError::TrailingInput);
131    }
132    Ok(form)
133}
134
135/// Render a domain form as an ASCII `#(...)` string. Round-trips through
136/// [`parse_domain_form`].
137///
138/// # Examples
139///
140/// ```
141/// use sim_codec::{format_domain_form, parse_domain_form};
142///
143/// let source = "#(Note dur=4/4 pitches=[60,64])";
144/// let form = parse_domain_form(source).unwrap();
145/// let rendered = format_domain_form(&form);
146/// assert_eq!(parse_domain_form(&rendered).unwrap(), form);
147/// ```
148pub fn format_domain_form(form: &DomainForm) -> String {
149    let mut out = String::from("#(");
150    out.push_str(&form.name);
151    for value in &form.positional {
152        out.push(' ');
153        format_value(value, &mut out);
154    }
155    for (key, value) in &form.fields {
156        out.push(' ');
157        out.push_str(key);
158        out.push('=');
159        format_value(value, &mut out);
160    }
161    out.push(')');
162    out
163}
164
165fn format_value(value: &DomainValue, out: &mut String) {
166    match value {
167        DomainValue::Form(form) => out.push_str(&format_domain_form(form)),
168        DomainValue::List(items) => {
169            out.push('[');
170            for (index, item) in items.iter().enumerate() {
171                if index > 0 {
172                    out.push(',');
173                }
174                format_value(item, out);
175            }
176            out.push(']');
177        }
178        DomainValue::String(text) => {
179            out.push('"');
180            for ch in text.chars() {
181                if ch == '\\' || ch == '"' {
182                    out.push('\\');
183                }
184                out.push(ch);
185            }
186            out.push('"');
187        }
188        DomainValue::Atom(text) => out.push_str(text),
189    }
190}
191
192struct Parser<'a> {
193    input: &'a str,
194    index: usize,
195}
196
197impl Parser<'_> {
198    fn parse_form_body(&mut self) -> Result<DomainForm, DomainFormError> {
199        let name = self.parse_ident()?;
200        let mut fields: Vec<(String, DomainValue)> = Vec::new();
201        let mut positional = Vec::new();
202        loop {
203            self.skip_ws();
204            if self.consume_char(')') {
205                break;
206            }
207            match self.peek_char() {
208                Some('#') | Some('[') | Some('"') => positional.push(self.parse_value()?),
209                _ => {
210                    let atom = self.parse_atom()?;
211                    if self.consume_char('=') {
212                        if fields.iter().any(|(key, _)| key == &atom) {
213                            return Err(DomainFormError::DuplicateField(atom));
214                        }
215                        fields.push((atom, self.parse_value()?));
216                    } else {
217                        positional.push(DomainValue::Atom(atom));
218                    }
219                }
220            }
221        }
222        Ok(DomainForm {
223            name,
224            fields,
225            positional,
226        })
227    }
228
229    fn parse_value(&mut self) -> Result<DomainValue, DomainFormError> {
230        self.skip_ws();
231        if self.consume_str("#(") {
232            return self.parse_form_body().map(DomainValue::Form);
233        }
234        if self.consume_char('[') {
235            return self.parse_list();
236        }
237        if self.peek_char() == Some('"') {
238            return self.parse_string().map(DomainValue::String);
239        }
240        self.parse_atom().map(DomainValue::Atom)
241    }
242
243    fn parse_list(&mut self) -> Result<DomainValue, DomainFormError> {
244        let mut items = Vec::new();
245        loop {
246            self.skip_ws();
247            if self.consume_char(']') {
248                break;
249            }
250            items.push(self.parse_value()?);
251            self.skip_ws();
252            if self.consume_char(',') {
253                continue;
254            }
255            self.expect_char(']')?;
256            break;
257        }
258        Ok(DomainValue::List(items))
259    }
260
261    fn parse_string(&mut self) -> Result<String, DomainFormError> {
262        self.expect_char('"')?;
263        let mut out = String::new();
264        while let Some(ch) = self.next_char() {
265            match ch {
266                '"' => return Ok(out),
267                '\\' => out.push(self.next_char().ok_or(DomainFormError::UnexpectedEof)?),
268                other => out.push(other),
269            }
270        }
271        Err(DomainFormError::UnexpectedEof)
272    }
273
274    fn parse_atom(&mut self) -> Result<String, DomainFormError> {
275        let start = self.index;
276        while let Some(ch) = self.peek_char() {
277            if ch.is_whitespace() || [',', ')', ']', '='].contains(&ch) {
278                break;
279            }
280            self.index += ch.len_utf8();
281        }
282        if self.index == start {
283            return Err(DomainFormError::UnexpectedEof);
284        }
285        Ok(self.input[start..self.index].to_owned())
286    }
287
288    fn parse_ident(&mut self) -> Result<String, DomainFormError> {
289        let atom = self.parse_atom()?;
290        if atom
291            .chars()
292            .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')
293        {
294            Ok(atom)
295        } else {
296            Err(DomainFormError::InvalidToken)
297        }
298    }
299
300    fn expect_char(&mut self, expected: char) -> Result<(), DomainFormError> {
301        match self.next_char() {
302            Some(ch) if ch == expected => Ok(()),
303            Some(_) => Err(DomainFormError::InvalidToken),
304            None => Err(DomainFormError::UnexpectedEof),
305        }
306    }
307
308    fn consume_char(&mut self, expected: char) -> bool {
309        if self.peek_char() == Some(expected) {
310            self.index += expected.len_utf8();
311            true
312        } else {
313            false
314        }
315    }
316
317    fn consume_str(&mut self, expected: &str) -> bool {
318        if self.input[self.index..].starts_with(expected) {
319            self.index += expected.len();
320            true
321        } else {
322            false
323        }
324    }
325
326    fn skip_ws(&mut self) {
327        while let Some(ch) = self.peek_char() {
328            if ch.is_whitespace() {
329                self.index += ch.len_utf8();
330            } else {
331                break;
332            }
333        }
334    }
335
336    fn peek_char(&self) -> Option<char> {
337        self.input[self.index..].chars().next()
338    }
339
340    fn next_char(&mut self) -> Option<char> {
341        let ch = self.peek_char()?;
342        self.index += ch.len_utf8();
343        Some(ch)
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn parses_keyed_form_with_list_and_nested_form() {
353        let form = parse_domain_form("#(Note dur=4/4 pitch=C4 tags=[a,b] inner=#(Rest dur=1/4))")
354            .expect("parse");
355        assert_eq!(form.name, "Note");
356        assert_eq!(form.atom("dur").unwrap(), "4/4");
357        assert_eq!(form.atom("pitch").unwrap(), "C4");
358        assert_eq!(
359            form.list("tags").unwrap(),
360            &[
361                DomainValue::Atom("a".to_owned()),
362                DomainValue::Atom("b".to_owned())
363            ]
364        );
365        assert_eq!(form.form("inner").unwrap().name, "Rest");
366    }
367
368    #[test]
369    fn parses_positional_values() {
370        let form = parse_domain_form("#(Chord 60 64 67)").expect("parse");
371        assert_eq!(form.positional.len(), 3);
372        assert!(form.fields.is_empty());
373    }
374
375    #[test]
376    fn round_trips_through_format() {
377        let source = "#(Note dur=4/4 sym=\"a\\\"b\" pitches=[60,64])";
378        let form = parse_domain_form(source).expect("parse");
379        let rendered = format_domain_form(&form);
380        assert_eq!(parse_domain_form(&rendered).unwrap(), form);
381    }
382
383    #[test]
384    fn missing_close_paren_is_unexpected_eof() {
385        assert_eq!(
386            parse_domain_form("#(Note dur=4/4"),
387            Err(DomainFormError::UnexpectedEof)
388        );
389    }
390
391    #[test]
392    fn duplicate_field_is_rejected() {
393        assert_eq!(
394            parse_domain_form("#(Note dur=4/4 dur=1/2)"),
395            Err(DomainFormError::DuplicateField("dur".to_owned()))
396        );
397    }
398
399    #[test]
400    fn trailing_input_is_rejected() {
401        assert_eq!(
402            parse_domain_form("#(Note dur=4/4) extra"),
403            Err(DomainFormError::TrailingInput)
404        );
405    }
406
407    #[test]
408    fn non_form_input_is_rejected() {
409        assert_eq!(
410            parse_domain_form("Note"),
411            Err(DomainFormError::ExpectedForm)
412        );
413    }
414
415    #[test]
416    fn escaped_quotes_round_trip() {
417        let form = parse_domain_form("#(S v=\"he said \\\"hi\\\"\")").expect("parse");
418        assert_eq!(form.string("v").unwrap(), "he said \"hi\"");
419    }
420}