maddi_xml/
lib.rs

1// SPDX-FileCopyrightText: 2025 Madeline Baggins <declanbaggins@gmail.com>
2//
3// SPDX-License-Identifier: MIT
4
5use std::{
6    borrow::Cow,
7    collections::HashMap,
8    fs::File,
9    num::{IntErrorKind, ParseIntError},
10    path::{Path, PathBuf},
11    str::FromStr,
12};
13
14#[derive(Clone)]
15pub struct Parser<'a> {
16    tail: &'a str,
17    pub position: Position<'a>,
18}
19
20#[derive(Debug, Clone)]
21pub struct Position<'a> {
22    pub path: &'a Path,
23    pub src: &'a str,
24    pub line: usize,
25    pub char: usize,
26}
27
28impl<'a> Position<'a> {
29    pub fn error(&self, message: String) -> Error<'a> {
30        Error {
31            message,
32            position: self.clone(),
33        }
34    }
35}
36
37pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
38
39#[derive(Debug)]
40pub struct Error<'a> {
41    pub message: String,
42    pub position: Position<'a>,
43}
44
45impl std::fmt::Display for Error<'_> {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        const RED: &str = "\x1b[1;31m";
48        const DEFAULT: &str = "\x1b[1;39m";
49        writeln!(
50            f,
51            "{RED}Error in '{}':{DEFAULT}",
52            self.position.path.display()
53        )?;
54        for (line_num, line) in self.position.src.split('\n').enumerate() {
55            writeln!(f, "{line}")?;
56            if line_num == self.position.line {
57                let offset = std::iter::repeat_n(' ', self.position.char).collect::<String>();
58                writeln!(f, "{offset}^")?;
59                let offset_len = self.position.char.saturating_sub(self.message.len());
60                let offset = std::iter::repeat_n(' ', offset_len).collect::<String>();
61                writeln!(f, "{offset}{RED}{}{DEFAULT}", self.message)?;
62            }
63        }
64        Ok(())
65    }
66}
67
68impl<'a> Parser<'a> {
69    pub fn new(path: &'a Path, src: &'a str) -> Self {
70        Self {
71            tail: src,
72            position: Position {
73                src,
74                path,
75                line: 0,
76                char: 0,
77            },
78        }
79    }
80    pub fn parse<T: Parse<'a>>(&mut self) -> T {
81        T::parse(self)
82    }
83    fn take_whitespace(&mut self) {
84        let len = self
85            .tail
86            .find(|c: char| !c.is_whitespace())
87            .unwrap_or(self.tail.len());
88        self.take(len);
89    }
90    fn take_char(&mut self) -> Option<char> {
91        let char = self.tail.chars().next()?;
92        match char {
93            '\n' => {
94                self.position.line += 1;
95                self.position.char = 0;
96            }
97            _ => self.position.char += 1,
98        }
99        (_, self.tail) = self.tail.split_at(char.len_utf8());
100        Some(char)
101    }
102    fn take(&mut self, n: usize) -> &'a str {
103        let head;
104        (head, self.tail) = self.tail.split_at(n);
105        for c in head.chars() {
106            match c {
107                '\n' => {
108                    self.position.line += 1;
109                    self.position.char = 0;
110                }
111                _ => self.position.char += 1,
112            }
113        }
114        head
115    }
116}
117
118pub trait Parse<'a> {
119    fn parse(parser: &mut Parser<'a>) -> Self;
120}
121
122#[derive(Debug)]
123pub enum Content<'a> {
124    Element(Element<'a>),
125    Text(String),
126}
127
128impl<'a> Parse<'a> for Option<Result<'a, Content<'a>>> {
129    fn parse(parser: &mut Parser<'a>) -> Self {
130        // Clear any whitespace
131        parser.take_whitespace();
132        // If the document has finished parsing
133        if parser.tail.is_empty() {
134            return None;
135        };
136        // Check if we start with an element
137        match parser.parse::<Option<Result<Element>>>() {
138            Some(Ok(element)) => return Some(Ok(Content::Element(element))),
139            Some(Err(err)) => return Some(Err(err)),
140            None => {}
141        }
142        // Otherwise, get the text
143        let len = parser.tail.find('<').unwrap_or(parser.tail.len());
144        let text = parser.take(len);
145        Some(Ok(Content::Text(text.into())))
146    }
147}
148
149#[derive(Debug)]
150pub struct Element<'a> {
151    pub name: &'a str,
152    pub attributes: HashMap<&'a str, Attribute<'a>>,
153    pub contents: Vec<Content<'a>>,
154    pub position: Position<'a>,
155}
156
157impl<'a> Element<'a> {
158    pub fn attribute<'b, T: Query<'a, 'b>>(&'b self, name: &str) -> Result<'a, T> {
159        T::get(name, self)
160    }
161
162    pub fn children<'b, 'c, T: FromElement<'a, 'b>>(
163        &'b self,
164        name: &'c str,
165    ) -> impl Iterator<Item = Result<'a, T>> + use<'a, 'b, 'c, T> {
166        use Content;
167        self.contents
168            .iter()
169            .filter_map(move |item| match item {
170                Content::Element(e) if e.name == name => Some(e),
171                _ => None,
172            })
173            .map(|t| T::from_element(t))
174    }
175    pub fn child<'b, 'c, T: FromElement<'a, 'b>>(&'b self, name: &'c str) -> Result<'a, T> {
176        use Content;
177        let mut candidates = self.contents.iter().filter_map(move |item| match item {
178            Content::Element(e) if e.name == name => Some(e),
179            _ => None,
180        });
181        let Some(result) = candidates.next() else {
182            return Err(self.position.error(format!("expected '{name}' element")));
183        };
184        if let Some(duplicate) = candidates.next() {
185            return Err(duplicate
186                .position
187                .error(format!("duplicate '{name}' element")));
188        }
189        T::from_element(result)
190    }
191    pub fn optional_child<'b, 'c, T: FromElement<'a, 'b>>(
192        &'b self,
193        name: &'c str,
194    ) -> Result<'a, Option<T>> {
195        use Content;
196        let mut candidates = self.contents.iter().filter_map(move |item| match item {
197            Content::Element(e) if e.name == name => Some(e),
198            _ => None,
199        });
200        let Some(result) = candidates.next() else {
201            return Ok(None);
202        };
203        if let Some(duplicate) = candidates.next() {
204            return Err(duplicate
205                .position
206                .error(format!("duplicate '{name}' element")));
207        }
208        Some(T::from_element(result)).transpose()
209    }
210}
211
212impl<'a> Parse<'a> for Option<Result<'a, Element<'a>>> {
213    fn parse(parser: &mut Parser<'a>) -> Self {
214        // Find the opening tag if there is one
215        let open_tag = match parser.parse::<Option<Result<OpenTag>>>()? {
216            Ok(open_tag) => open_tag,
217            Err(err) => return Some(Err(err)),
218        };
219        // If the tag was self closing, return the entity
220        let mut contents = vec![];
221        if open_tag.self_closing {
222            return Some(Ok(Element {
223                name: open_tag.name,
224                position: open_tag.position,
225                attributes: open_tag.attributes,
226                contents,
227            }));
228        }
229        // Parse all the content
230        let close_tag = loop {
231            // Remove any whitespace
232            parser.take_whitespace();
233            // Check if there's a closing tag
234            if let Some(close_tag) = parser.parse::<Option<Result<CloseTag>>>() {
235                break close_tag;
236            }
237            // Otherwise, try to get content
238            match parser.parse::<Option<Result<Content>>>() {
239                Some(Err(err)) => return Some(Err(err)),
240                Some(Ok(content)) => contents.push(content),
241                None => {
242                    let err = format!("missing closing tag, expected: </{}>", open_tag.name);
243                    return Some(Err(parser.position.error(err)));
244                }
245            }
246        };
247        // Ensure we didn't error getting the close tag
248        let close_tag = match close_tag {
249            Ok(close_tag) => close_tag,
250            Err(err) => return Some(Err(err)),
251        };
252        // Ensure the close and open tags match
253        if open_tag.name != close_tag.name {
254            let err = format!("mismatched closing tag, expected: </{}>", open_tag.name);
255            return Some(Err(parser.position.error(err)));
256        }
257        Some(Ok(Element {
258            name: open_tag.name,
259            attributes: open_tag.attributes,
260            contents,
261            position: open_tag.position,
262        }))
263    }
264}
265
266/// The name of an element.
267/// - Must start with a letter or underscore.
268/// - Cannot start with the letters "xml" in any case.
269/// - Consists only of letters, digits, hyphens,
270///   underscores, and periods.
271struct Name<'a>(&'a str);
272
273impl<'a> Parse<'a> for Option<Name<'a>> {
274    fn parse(parser: &mut Parser<'a>) -> Self {
275        // Ensure tail starts with a letter or underscore
276        if !parser
277            .tail
278            .starts_with(|c: char| c.is_alphabetic() || c == '_')
279        {
280            return None;
281        }
282        // Ensure tail doesn't start with 'xml' in any case
283        if parser
284            .tail
285            .get(0..3)
286            .is_some_and(|f| f.to_lowercase() == "xml")
287        {
288            return None;
289        }
290        // Find the head of the tail that only consists of
291        // digits, hyphens, underscores, and periods.
292        let len = parser
293            .tail
294            .find(|c: char| !c.is_ascii_alphanumeric() && !['.', '_', '-'].contains(&c))
295            .unwrap_or(parser.tail.len());
296        let name = parser.tail.get(..len).unwrap();
297        (!name.is_empty()).then_some(Name(parser.take(len)))
298    }
299}
300
301struct OpenTag<'a> {
302    name: &'a str,
303    attributes: HashMap<&'a str, Attribute<'a>>,
304    self_closing: bool,
305    position: Position<'a>,
306}
307
308impl<'a> Parse<'a> for Option<Result<'a, OpenTag<'a>>> {
309    fn parse(parser: &mut Parser<'a>) -> Self {
310        // Ensure we're parsing an open tag
311        if !parser.tail.starts_with('<') {
312            return None;
313        }
314        // Skip over the opening chevron
315        parser.take(1);
316        // Get the element's name
317        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
318            return Some(Err(parser.position.error("expected element name".into())));
319        };
320        // Skip any whitespace
321        parser.take_whitespace();
322        // Parse any attributes
323        let mut attributes = HashMap::new();
324        while let Some(attribute) = parser.parse::<Option<Result<Attribute>>>() {
325            match attribute {
326                Ok(attribute) => {
327                    if let Some(old) = attributes.insert(attribute.name, attribute) {
328                        let duplicate = attributes.get(old.name).unwrap();
329                        return Some(Err(duplicate
330                            .position
331                            .error(format!("found duplicate '{}' attribute", old.name))));
332                    }
333                }
334                Err(e) => return Some(Err(e)),
335            }
336            parser.take_whitespace();
337        }
338        // Ensure the opening tag ends with '/>' or '>'.
339        let self_closing = parser.tail.starts_with("/>");
340        if !self_closing && !parser.tail.starts_with(">") {
341            return Some(Err(parser.position.error("expected '>' or '/>'".into())));
342        }
343        // Skip the ending bit
344        if self_closing {
345            parser.take("/>".len());
346        } else {
347            parser.take(">".len());
348        }
349        // Build the opening tag
350        Some(Ok(OpenTag {
351            name,
352            attributes,
353            self_closing,
354            position: parser.position.clone(),
355        }))
356    }
357}
358
359#[derive(Debug)]
360pub struct Attribute<'a> {
361    pub name: &'a str,
362    pub value: Option<String>,
363    pub position: Position<'a>,
364}
365
366impl<'a> Parse<'a> for Option<Result<'a, Attribute<'a>>> {
367    fn parse(parser: &mut Parser<'a>) -> Self {
368        // Clone the parser in case we need to restore it
369        let backup = parser.clone();
370        // Get the name of the attribute
371        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
372            *parser = backup;
373            return None;
374        };
375        // If there's no value to the attribute, finish
376        // parsing.
377        if !parser.tail.starts_with('=') {
378            return Some(Ok(Attribute {
379                name,
380                value: None,
381                position: parser.position.clone(),
382            }));
383        }
384        // Skip the '='
385        parser.take(1);
386        // Parse the value of the attribute
387        let Some(AttributeValue(value)) = parser.parse::<Option<AttributeValue>>() else {
388            return Some(Err(parser
389                .position
390                .error("expected attribute value".into())));
391        };
392        Some(Ok(Attribute {
393            name,
394            value: Some(value),
395            position: parser.position.clone(),
396        }))
397    }
398}
399
400struct AttributeValue(String);
401
402impl Parse<'_> for Option<AttributeValue> {
403    fn parse(parser: &mut Parser) -> Self {
404        // Ensure the parser starts with a single or double
405        // quote.
406        let quote = match parser.tail.chars().next()? {
407            c @ ('"' | '\'') => c,
408            _ => return None,
409        };
410        // Create a working copy of the parser
411        let mut working = parser.clone();
412        working.take(1);
413        // Build out the string
414        // TODO: Add support for character entities
415        let mut value = String::new();
416        loop {
417            let next = working.take_char()?;
418            match next {
419                '\\' => match working.take_char()? {
420                    c @ ('\\' | '\'' | '"') => value.push(c),
421                    _ => return None,
422                },
423                c if c == quote => break,
424                c => value.push(c),
425            }
426        }
427        // Save the working copy of the parser
428        *parser = working;
429        Some(AttributeValue(value))
430    }
431}
432
433struct CloseTag<'a> {
434    name: &'a str,
435}
436
437impl<'a> Parse<'a> for Option<Result<'a, CloseTag<'a>>> {
438    fn parse(parser: &mut Parser<'a>) -> Self {
439        // Ensure we're at the start of a closing tag
440        if !parser.tail.starts_with("</") {
441            return None;
442        }
443        parser.take("</".len());
444        // Get the name of the closing tag
445        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
446            return Some(Err(parser.position.error("expected element name".into())));
447        };
448        // Ensure we end with a '>'.
449        if !parser.tail.starts_with('>') {
450            return Some(Err(parser.position.error("expected '>'".into())));
451        }
452        // Skip the '>'.
453        parser.take(">".len());
454        Some(Ok(CloseTag { name }))
455    }
456}
457
458pub trait FromValue<'a, 'b>: Sized {
459    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self>;
460}
461
462impl<'a, 'b> FromValue<'a, 'b> for Cow<'b, str> {
463    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
464        Ok(Cow::Borrowed(value))
465    }
466}
467
468impl<'a, 'b> FromValue<'a, 'b> for &'b str {
469    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
470        Ok(value)
471    }
472}
473
474impl<'a, 'b> FromValue<'a, 'b> for &'b Path {
475    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
476        Ok(value.as_ref())
477    }
478}
479
480impl<'a, 'b> FromValue<'a, 'b> for String {
481    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
482        Ok(value.into())
483    }
484}
485
486impl<'a, 'b> FromValue<'a, 'b> for PathBuf {
487    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
488        Ok(PathBuf::from(value))
489    }
490}
491
492pub trait FromNumeric: FromStr<Err = ParseIntError> {}
493
494impl FromNumeric for u8 {}
495
496impl FromNumeric for u16 {}
497
498impl FromNumeric for u32 {}
499
500impl FromNumeric for u64 {}
501
502impl FromNumeric for u128 {}
503
504impl FromNumeric for usize {}
505
506impl FromNumeric for i8 {}
507
508impl FromNumeric for i16 {}
509
510impl FromNumeric for i32 {}
511
512impl FromNumeric for i64 {}
513
514impl FromNumeric for i128 {}
515
516impl FromNumeric for isize {}
517
518impl<'a, 'b, T> FromValue<'a, 'b> for T
519where
520    T: FromNumeric,
521{
522    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self> {
523        value.parse::<T>().map_err(|e| {
524            let msg = match e.kind() {
525                IntErrorKind::Empty => "failed to parse integer from empty string",
526                IntErrorKind::InvalidDigit => "value contains invalid digit",
527                IntErrorKind::PosOverflow => "value too large for this attribute",
528                IntErrorKind::NegOverflow => "value too small for this attribute",
529                IntErrorKind::Zero => "value cannot be zero for this attribute",
530                _ => "unknown integer parse error",
531            }
532            .into();
533            position.error(msg)
534        })
535    }
536}
537
538pub trait FromAttribute<'a, 'b>: Sized {
539    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self>;
540}
541
542impl<'a, 'b, T: FromValue<'a, 'b>> FromAttribute<'a, 'b> for T {
543    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self> {
544        let Some(value) = attribute.value.as_ref() else {
545            let name = attribute.name;
546            return Err(attribute
547                .position
548                .error(format!("expected non-empty value for '{name}'")));
549        };
550        T::from_value(value, &attribute.position)
551    }
552}
553
554pub trait Query<'a, 'b>: Sized {
555    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self>;
556}
557
558impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for T {
559    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
560        let Some(attribute) = element.attributes.get(name) else {
561            let msg = format!("expected '{name}' attribute");
562            return Err(element.position.error(msg));
563        };
564        T::from_attribute(attribute)
565    }
566}
567
568impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for Option<T> {
569    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
570        element
571            .attributes
572            .get(name)
573            .map(|a| T::from_attribute(a))
574            .transpose()
575    }
576}
577
578impl<'a, 'b> Query<'a, 'b> for bool {
579    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
580        Ok(element.attributes.contains_key(name))
581    }
582}
583
584pub trait FromElement<'a, 'b>: Sized {
585    fn from_element(element: &'b Element<'a>) -> Result<'a, Self>;
586}
587
588impl<'a, 'b> FromElement<'a, 'b> for &'b Element<'a> {
589    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
590        Ok(element)
591    }
592}
593
594impl<'a, 'b, T> FromElement<'a, 'b> for T
595where
596    T: FromValue<'a, 'b>,
597{
598    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
599        match element.contents.as_slice() {
600            [Content::Text(value)] => T::from_value(value, &element.position),
601            _ => Err(element
602                .position
603                .error("expected element to contain a single value".into())),
604        }
605    }
606}
607
608pub trait FromConfig
609where
610    Self: for<'a, 'b> FromElement<'a, 'b>,
611{
612    const ROOT: &'static str;
613    fn load(path: &Path) -> std::result::Result<Self, String> {
614        use std::io::Read;
615
616        let mut file =
617            File::open(path).map_err(|_| format!("could not open config: {}", path.display()))?;
618        let mut config = String::new();
619        file.read_to_string(&mut config)
620            .map_err(|_| format!("could not read config file: {}", path.display()))?;
621        let mut parser = Parser::new(path, &config);
622        let err = format!("expected root '<{}>' element", Self::ROOT);
623        let element = match parser.parse::<Option<Result<Content>>>() {
624            Some(Ok(Content::Element(e))) => {
625                if e.name != Self::ROOT {
626                    return Err(format!("{}", e.position.error(err)));
627                }
628                e
629            }
630            Some(Err(e)) => return Err(format!("{e}")),
631            _ => return Err(format!("{}", parser.position.error(err))),
632        };
633        Self::from_element(&element).map_err(|e| format!("{e}"))
634    }
635}