maddi_xml/
lib.rs

1// SPDX-FileCopyrightText: 2025 Madeline Baggins <declanbaggins@gmail.com>
2//
3// SPDX-License-Identifier: MIT
4
5use std::{
6    borrow::Cow, collections::HashMap, num::{IntErrorKind, ParseIntError}, path::{Path, PathBuf}, str::FromStr
7};
8
9#[derive(Clone)]
10pub struct Parser<'a> {
11    tail: &'a str,
12    pub position: Position<'a>,
13}
14
15#[derive(Debug, Clone)]
16pub struct Position<'a> {
17    pub path: &'a Path,
18    pub src: &'a str,
19    pub line: usize,
20    pub char: usize,
21}
22
23impl<'a> Position<'a> {
24    pub fn error(&self, message: String) -> Error<'a> {
25        Error {
26            message,
27            position: self.clone(),
28        }
29    }
30}
31
32pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
33
34#[derive(Debug)]
35pub struct Error<'a> {
36    pub message: String,
37    pub position: Position<'a>,
38}
39
40impl std::fmt::Display for Error<'_> {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        const RED: &str = "\x1b[1;31m";
43        const DEFAULT: &str = "\x1b[1;39m";
44        writeln!(
45            f,
46            "{RED}Error in '{}':{DEFAULT}",
47            self.position.path.display()
48        )?;
49        for (line_num, line) in self.position.src.split('\n').enumerate() {
50            writeln!(f, "{line}")?;
51            if line_num == self.position.line {
52                let offset = std::iter::repeat_n(' ', self.position.char).collect::<String>();
53                writeln!(f, "{offset}^")?;
54                let offset_len = self.position.char.saturating_sub(self.message.len());
55                let offset = std::iter::repeat_n(' ', offset_len).collect::<String>();
56                writeln!(f, "{offset}{RED}{}{DEFAULT}", self.message)?;
57            }
58        }
59        Ok(())
60    }
61}
62
63impl<'a> Parser<'a> {
64    pub fn new(path: &'a Path, src: &'a str) -> Self {
65        Self {
66            tail: src,
67            position: Position {
68                src,
69                path,
70                line: 0,
71                char: 0,
72            },
73        }
74    }
75    pub fn parse<T: Parse<'a>>(&mut self) -> T {
76        T::parse(self)
77    }
78    fn take_whitespace(&mut self) {
79        let len = self
80            .tail
81            .find(|c: char| !c.is_whitespace())
82            .unwrap_or(self.tail.len());
83        self.take(len);
84    }
85    fn take_char(&mut self) -> Option<char> {
86        let char = self.tail.chars().next()?;
87        match char {
88            '\n' => {
89                self.position.line += 1;
90                self.position.char = 0;
91            }
92            _ => self.position.char += 1,
93        }
94        (_, self.tail) = self.tail.split_at(char.len_utf8());
95        Some(char)
96    }
97    fn take(&mut self, n: usize) -> &'a str {
98        let head;
99        (head, self.tail) = self.tail.split_at(n);
100        for c in head.chars() {
101            match c {
102                '\n' => {
103                    self.position.line += 1;
104                    self.position.char = 0;
105                }
106                _ => self.position.char += 1,
107            }
108        }
109        head
110    }
111}
112
113pub trait Parse<'a> {
114    fn parse(parser: &mut Parser<'a>) -> Self;
115}
116
117#[derive(Debug)]
118pub enum Content<'a> {
119    Element(Element<'a>),
120    Text(String),
121}
122
123impl<'a> Parse<'a> for Option<Result<'a, Content<'a>>> {
124    fn parse(parser: &mut Parser<'a>) -> Self {
125        // Clear any whitespace
126        parser.take_whitespace();
127        // If the document has finished parsing
128        if parser.tail.is_empty() {
129            return None;
130        };
131        // Check if we start with an element
132        match parser.parse::<Option<Result<Element>>>() {
133            Some(Ok(element)) => return Some(Ok(Content::Element(element))),
134            Some(Err(err)) => return Some(Err(err)),
135            None => {}
136        }
137        // Otherwise, get the text
138        let len = parser.tail.find('<').unwrap_or(parser.tail.len());
139        let text = parser.take(len);
140        Some(Ok(Content::Text(text.into())))
141    }
142}
143
144#[derive(Debug)]
145pub struct Element<'a> {
146    pub name: &'a str,
147    pub attributes: HashMap<&'a str, Attribute<'a>>,
148    pub contents: Vec<Content<'a>>,
149    pub position: Position<'a>,
150}
151
152impl<'a> Element<'a> {
153    pub fn attribute<'b, T: Query<'a, 'b>>(&'b self, name: &str) -> Result<'a, T> {
154        T::get(name, self)
155    }
156
157    pub fn children<'b, 'c, T: FromElement<'a, 'b>>(
158        &'b self,
159        name: &'c str,
160    ) -> impl Iterator<Item = Result<'a, T>> + use<'a, 'b, 'c, T> {
161        use Content;
162        self.contents
163            .iter()
164            .filter_map(move |item| match item {
165                Content::Element(e) if e.name == name => Some(e),
166                _ => None,
167            })
168            .map(|t| T::from_element(t))
169    }
170    pub fn child<'b, 'c, T: FromElement<'a, 'b>>(&'b self, name: &'c str) -> Result<'a, T> {
171        use Content;
172        let mut candidates = self.contents.iter().filter_map(move |item| match item {
173            Content::Element(e) if e.name == name => Some(e),
174            _ => None,
175        });
176        let Some(result) = candidates.next() else {
177            return Err(self.position.error(format!("expected '{name}' element")));
178        };
179        if let Some(duplicate) = candidates.next() {
180            return Err(duplicate
181                .position
182                .error(format!("duplicate '{name}' element")));
183        }
184        T::from_element(result)
185    }
186    pub fn optional_child<'b, 'c, T: FromElement<'a, 'b>>(
187        &'b self,
188        name: &'c str,
189    ) -> Result<'a, Option<T>> {
190        use Content;
191        let mut candidates = self.contents.iter().filter_map(move |item| match item {
192            Content::Element(e) if e.name == name => Some(e),
193            _ => None,
194        });
195        let Some(result) = candidates.next() else {
196            return Ok(None);
197        };
198        if let Some(duplicate) = candidates.next() {
199            return Err(duplicate
200                .position
201                .error(format!("duplicate '{name}' element")));
202        }
203        Some(T::from_element(result)).transpose()
204    }
205}
206
207impl<'a> Parse<'a> for Option<Result<'a, Element<'a>>> {
208    fn parse(parser: &mut Parser<'a>) -> Self {
209        // Find the opening tag if there is one
210        let open_tag = match parser.parse::<Option<Result<OpenTag>>>()? {
211            Ok(open_tag) => open_tag,
212            Err(err) => return Some(Err(err)),
213        };
214        // If the tag was self closing, return the entity
215        let mut contents = vec![];
216        if open_tag.self_closing {
217            return Some(Ok(Element {
218                name: open_tag.name,
219                position: open_tag.position,
220                attributes: open_tag.attributes,
221                contents,
222            }));
223        }
224        // Parse all the content
225        let close_tag = loop {
226            // Remove any whitespace
227            parser.take_whitespace();
228            // Check if there's a closing tag
229            if let Some(close_tag) = parser.parse::<Option<Result<CloseTag>>>() {
230                break close_tag;
231            }
232            // Otherwise, try to get content
233            match parser.parse::<Option<Result<Content>>>() {
234                Some(Err(err)) => return Some(Err(err)),
235                Some(Ok(content)) => contents.push(content),
236                None => return Some(Err(parser.position.error("missing closing tag".into()))),
237            }
238        };
239        // Ensure we didn't error getting the close tag
240        let close_tag = match close_tag {
241            Ok(close_tag) => close_tag,
242            Err(err) => return Some(Err(err)),
243        };
244        // Ensure the close and open tags match
245        if open_tag.name != close_tag.name {
246            return Some(Err(parser.position.error("mismatched closing tag".into())));
247        }
248        Some(Ok(Element {
249            name: open_tag.name,
250            attributes: open_tag.attributes,
251            contents,
252            position: open_tag.position,
253        }))
254    }
255}
256
257/// The name of an element.
258/// - Must start with a letter or underscore.
259/// - Cannot start with the letters "xml" in any case.
260/// - Consists only of letters, digits, hyphens,
261///   underscores, and periods.
262struct Name<'a>(&'a str);
263
264impl<'a> Parse<'a> for Option<Name<'a>> {
265    fn parse(parser: &mut Parser<'a>) -> Self {
266        // Ensure tail starts with a letter or underscore
267        if !parser
268            .tail
269            .starts_with(|c: char| c.is_alphabetic() || c == '_')
270        {
271            return None;
272        }
273        // Ensure tail doesn't start with 'xml' in any case
274        if parser
275            .tail
276            .get(0..3)
277            .is_some_and(|f| f.to_lowercase() == "xml")
278        {
279            return None;
280        }
281        // Find the head of the tail that only consists of
282        // digits, hyphens, underscores, and periods.
283        let len = parser
284            .tail
285            .find(|c: char| !c.is_ascii_alphanumeric() && !['.', '_', '-'].contains(&c))
286            .unwrap_or(parser.tail.len());
287        let name = parser.tail.get(..len).unwrap();
288        (!name.is_empty()).then_some(Name(parser.take(len)))
289    }
290}
291
292struct OpenTag<'a> {
293    name: &'a str,
294    attributes: HashMap<&'a str, Attribute<'a>>,
295    self_closing: bool,
296    position: Position<'a>,
297}
298
299impl<'a> Parse<'a> for Option<Result<'a, OpenTag<'a>>> {
300    fn parse(parser: &mut Parser<'a>) -> Self {
301        // Ensure we're parsing an open tag
302        if !parser.tail.starts_with('<') {
303            return None;
304        }
305        // Skip over the opening chevron
306        parser.take(1);
307        // Get the element's name
308        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
309            return Some(Err(parser.position.error("expected element name".into())));
310        };
311        // Skip any whitespace
312        parser.take_whitespace();
313        // Parse any attributes
314        let mut attributes = HashMap::new();
315        while let Some(attribute) = parser.parse::<Option<Result<Attribute>>>() {
316            match attribute {
317                Ok(attribute) => {
318                    if let Some(old) = attributes.insert(attribute.name, attribute) {
319                        let duplicate = attributes.get(old.name).unwrap();
320                        return Some(Err(duplicate
321                            .position
322                            .error(format!("found duplicate '{}' attribute", old.name))));
323                    }
324                }
325                Err(e) => return Some(Err(e)),
326            }
327            parser.take_whitespace();
328        }
329        // Ensure the opening tag ends with '/>' or '>'.
330        let self_closing = parser.tail.starts_with("/>");
331        if !self_closing && !parser.tail.starts_with(">") {
332            return Some(Err(parser.position.error("expected '>' or '/>'".into())));
333        }
334        // Skip the ending bit
335        if self_closing {
336            parser.take("/>".len());
337        } else {
338            parser.take(">".len());
339        }
340        // Build the opening tag
341        Some(Ok(OpenTag {
342            name,
343            attributes,
344            self_closing,
345            position: parser.position.clone(),
346        }))
347    }
348}
349
350#[derive(Debug)]
351pub struct Attribute<'a> {
352    pub name: &'a str,
353    pub value: Option<String>,
354    pub position: Position<'a>,
355}
356
357impl<'a> Parse<'a> for Option<Result<'a, Attribute<'a>>> {
358    fn parse(parser: &mut Parser<'a>) -> Self {
359        // Clone the parser in case we need to restore it
360        let backup = parser.clone();
361        // Get the name of the attribute
362        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
363            *parser = backup;
364            return None;
365        };
366        // If there's no value to the attribute, finish
367        // parsing.
368        if !parser.tail.starts_with('=') {
369            return Some(Ok(Attribute {
370                name,
371                value: None,
372                position: parser.position.clone(),
373            }));
374        }
375        // Skip the '='
376        parser.take(1);
377        // Parse the value of the attribute
378        let Some(AttributeValue(value)) = parser.parse::<Option<AttributeValue>>() else {
379            return Some(Err(parser
380                .position
381                .error("expected attribute value".into())));
382        };
383        Some(Ok(Attribute {
384            name,
385            value: Some(value),
386            position: parser.position.clone(),
387        }))
388    }
389}
390
391struct AttributeValue(String);
392
393impl Parse<'_> for Option<AttributeValue> {
394    fn parse(parser: &mut Parser) -> Self {
395        // Ensure the parser starts with a single or double
396        // quote.
397        let quote = match parser.tail.chars().next()? {
398            c @ ('"' | '\'') => c,
399            _ => return None,
400        };
401        // Create a working copy of the parser
402        let mut working = parser.clone();
403        working.take(1);
404        // Build out the string
405        // TODO: Add support for character entities
406        let mut value = String::new();
407        loop {
408            let next = working.take_char()?;
409            match next {
410                '\\' => match working.take_char()? {
411                    c @ ('\\' | '\'' | '"') => value.push(c),
412                    _ => return None,
413                },
414                c if c == quote => break,
415                c => value.push(c),
416            }
417        }
418        // Save the working copy of the parser
419        *parser = working;
420        Some(AttributeValue(value))
421    }
422}
423
424struct CloseTag<'a> {
425    name: &'a str,
426}
427
428impl<'a> Parse<'a> for Option<Result<'a, CloseTag<'a>>> {
429    fn parse(parser: &mut Parser<'a>) -> Self {
430        // Ensure we're at the start of a closing tag
431        if !parser.tail.starts_with("</") {
432            return None;
433        }
434        parser.take("</".len());
435        // Get the name of the closing tag
436        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
437            return Some(Err(parser.position.error("expected element name".into())));
438        };
439        // Ensure we end with a '>'.
440        if !parser.tail.starts_with('>') {
441            return Some(Err(parser.position.error("expected '>'".into())));
442        }
443        // Skip the '>'.
444        parser.take(">".len());
445        Some(Ok(CloseTag { name }))
446    }
447}
448
449pub trait FromValue<'a, 'b>: Sized {
450    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self>;
451}
452
453impl<'a, 'b> FromValue<'a, 'b> for Cow<'b, str> {
454    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
455        Ok(Cow::Borrowed(value))
456    }
457}
458
459impl<'a, 'b> FromValue<'a, 'b> for &'b str {
460    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
461        Ok(value)
462    }
463}
464
465impl<'a, 'b> FromValue<'a, 'b> for &'b Path {
466    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
467        Ok(value.as_ref())
468    }
469}
470
471impl<'a, 'b> FromValue<'a, 'b> for String {
472    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
473        Ok(value.into())
474    }
475}
476
477impl<'a, 'b> FromValue<'a, 'b> for PathBuf {
478    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
479        Ok(PathBuf::from(value))
480    }
481}
482
483pub trait FromNumeric: FromStr<Err = ParseIntError> {}
484
485impl FromNumeric for u8 {}
486
487impl FromNumeric for u16 {}
488
489impl FromNumeric for u32 {}
490
491impl FromNumeric for u64 {}
492
493impl FromNumeric for u128 {}
494
495impl FromNumeric for usize {}
496
497impl FromNumeric for i8 {}
498
499impl FromNumeric for i16 {}
500
501impl FromNumeric for i32 {}
502
503impl FromNumeric for i64 {}
504
505impl FromNumeric for i128 {}
506
507impl FromNumeric for isize {}
508
509impl<'a, 'b, T> FromValue<'a, 'b> for T
510where
511    T: FromNumeric,
512{
513    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self> {
514        value.parse::<T>().map_err(|e| {
515            let msg = match e.kind() {
516                IntErrorKind::Empty => "failed to parse integer from empty string",
517                IntErrorKind::InvalidDigit => "value contains invalid digit",
518                IntErrorKind::PosOverflow => "value too large for this attribute",
519                IntErrorKind::NegOverflow => "value too small for this attribute",
520                IntErrorKind::Zero => "value cannot be zero for this attribute",
521                _ => "unknown integer parse error",
522            }
523            .into();
524            position.error(msg)
525        })
526    }
527}
528
529pub trait FromAttribute<'a, 'b>: Sized {
530    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self>;
531}
532
533impl<'a, 'b, T: FromValue<'a, 'b>> FromAttribute<'a, 'b> for T {
534    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self> {
535        let Some(value) = attribute.value.as_ref() else {
536            let name = attribute.name;
537            return Err(attribute
538                .position
539                .error(format!("expected non-empty value for '{name}'")));
540        };
541        T::from_value(value, &attribute.position)
542    }
543}
544
545pub trait Query<'a, 'b>: Sized {
546    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self>;
547}
548
549impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for T {
550    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
551        let Some(attribute) = element.attributes.get(name) else {
552            let msg = format!("expected '{name}' attribute");
553            return Err(element.position.error(msg));
554        };
555        T::from_attribute(attribute)
556    }
557}
558
559impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for Option<T> {
560    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
561        element
562            .attributes
563            .get(name)
564            .map(|a| T::from_attribute(a))
565            .transpose()
566    }
567}
568
569impl<'a, 'b> Query<'a, 'b> for bool {
570    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
571        Ok(element.attributes.contains_key(name))
572    }
573}
574
575pub trait FromElement<'a, 'b>: Sized {
576    fn from_element(element: &'b Element<'a>) -> Result<'a, Self>;
577}
578
579impl<'a, 'b> FromElement<'a, 'b> for &'b Element<'a> {
580    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
581        Ok(element)
582    }
583}
584
585impl<'a, 'b, T> FromElement<'a, 'b> for T
586where
587    T: FromValue<'a, 'b>,
588{
589    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
590        match element.contents.as_slice() {
591            [Content::Text(value)] => T::from_value(value, &element.position),
592            _ => Err(element
593                .position
594                .error("expected element to contain a single value".into())),
595        }
596    }
597}