maddi_xml/
lib.rs

1// SPDX-FileCopyrightText: 2025 Madeline Baggins <declanbaggins@gmail.com>
2//
3// SPDX-License-Identifier: MIT
4
5use std::{
6    borrow::Cow,
7    collections::HashMap,
8    fs::File,
9    num::{IntErrorKind, ParseIntError},
10    path::{Path, PathBuf},
11    str::FromStr,
12};
13
14#[derive(Clone)]
15pub struct Parser<'a> {
16    tail: &'a str,
17    pub position: Position<'a>,
18}
19
20#[derive(Debug, Clone)]
21pub struct Position<'a> {
22    pub path: &'a Path,
23    pub src: &'a str,
24    pub line: usize,
25    pub char: usize,
26}
27
28impl<'a> Position<'a> {
29    pub fn error(&self, message: String) -> Error<'a> {
30        Error {
31            message,
32            position: self.clone(),
33        }
34    }
35}
36
37pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
38
39#[derive(Debug)]
40pub struct Error<'a> {
41    pub message: String,
42    pub position: Position<'a>,
43}
44
45impl std::fmt::Display for Error<'_> {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        const RED: &str = "\x1b[1;31m";
48        const DEFAULT: &str = "\x1b[1;39m";
49        writeln!(
50            f,
51            "{RED}Error in '{}':{DEFAULT}",
52            self.position.path.display()
53        )?;
54        for (line_num, line) in self.position.src.split('\n').enumerate() {
55            writeln!(f, "{line}")?;
56            if line_num == self.position.line {
57                let offset = std::iter::repeat_n(' ', self.position.char).collect::<String>();
58                writeln!(f, "{offset}^")?;
59                let offset_len = self.position.char.saturating_sub(self.message.len());
60                let offset = std::iter::repeat_n(' ', offset_len).collect::<String>();
61                writeln!(f, "{offset}{RED}{}{DEFAULT}", self.message)?;
62            }
63        }
64        Ok(())
65    }
66}
67
68impl<'a> Parser<'a> {
69    pub fn new(path: &'a Path, src: &'a str) -> Self {
70        Self {
71            tail: src,
72            position: Position {
73                src,
74                path,
75                line: 0,
76                char: 0,
77            },
78        }
79    }
80    pub fn parse<T: Parse<'a>>(&mut self) -> T {
81        T::parse(self)
82    }
83    fn take_whitespace(&mut self) {
84        let len = self
85            .tail
86            .find(|c: char| !c.is_whitespace())
87            .unwrap_or(self.tail.len());
88        self.take(len);
89    }
90    fn take_char(&mut self) -> Option<char> {
91        let char = self.tail.chars().next()?;
92        match char {
93            '\n' => {
94                self.position.line += 1;
95                self.position.char = 0;
96            }
97            _ => self.position.char += 1,
98        }
99        (_, self.tail) = self.tail.split_at(char.len_utf8());
100        Some(char)
101    }
102    fn take(&mut self, n: usize) -> &'a str {
103        let head;
104        (head, self.tail) = self.tail.split_at(n);
105        for c in head.chars() {
106            match c {
107                '\n' => {
108                    self.position.line += 1;
109                    self.position.char = 0;
110                }
111                _ => self.position.char += 1,
112            }
113        }
114        head
115    }
116}
117
118pub trait Parse<'a> {
119    fn parse(parser: &mut Parser<'a>) -> Self;
120}
121
122#[derive(Debug)]
123pub enum Content<'a> {
124    Element(Element<'a>),
125    Text(String),
126}
127
128impl<'a> Parse<'a> for Option<Result<'a, Content<'a>>> {
129    fn parse(parser: &mut Parser<'a>) -> Self {
130        // Clear any whitespace
131        parser.take_whitespace();
132        // If the document has finished parsing
133        if parser.tail.is_empty() {
134            return None;
135        };
136        // Check if we start with an element
137        match parser.parse::<Option<Result<Element>>>() {
138            Some(Ok(element)) => return Some(Ok(Content::Element(element))),
139            Some(Err(err)) => return Some(Err(err)),
140            None => {}
141        }
142        // Otherwise, get the text
143        let len = parser.tail.find('<').unwrap_or(parser.tail.len());
144        let text = parser.take(len);
145        Some(Ok(Content::Text(text.into())))
146    }
147}
148
149#[derive(Debug)]
150pub struct Element<'a> {
151    pub name: &'a str,
152    pub attributes: HashMap<&'a str, Attribute<'a>>,
153    pub contents: Vec<Content<'a>>,
154    pub position: Position<'a>,
155}
156
157impl<'a> Element<'a> {
158    pub fn attribute<'b, T: Query<'a, 'b>>(&'b self, name: &str) -> Result<'a, T> {
159        T::get(name, self)
160    }
161
162    pub fn children<'b, 'c, T: FromElement<'a, 'b>>(
163        &'b self,
164        name: &'c str,
165    ) -> impl Iterator<Item = Result<'a, T>> + use<'a, 'b, 'c, T> {
166        use Content;
167        self.contents
168            .iter()
169            .filter_map(move |item| match item {
170                Content::Element(e) if e.name == name => Some(e),
171                _ => None,
172            })
173            .map(|t| T::from_element(t))
174    }
175    pub fn child<'b, 'c, T: FromElement<'a, 'b>>(&'b self, name: &'c str) -> Result<'a, T> {
176        use Content;
177        let mut candidates = self.contents.iter().filter_map(move |item| match item {
178            Content::Element(e) if e.name == name => Some(e),
179            _ => None,
180        });
181        let Some(result) = candidates.next() else {
182            return Err(self.position.error(format!("expected '{name}' element")));
183        };
184        if let Some(duplicate) = candidates.next() {
185            return Err(duplicate
186                .position
187                .error(format!("duplicate '{name}' element")));
188        }
189        T::from_element(result)
190    }
191    pub fn optional_child<'b, 'c, T: FromElement<'a, 'b>>(
192        &'b self,
193        name: &'c str,
194    ) -> Result<'a, Option<T>> {
195        use Content;
196        let mut candidates = self.contents.iter().filter_map(move |item| match item {
197            Content::Element(e) if e.name == name => Some(e),
198            _ => None,
199        });
200        let Some(result) = candidates.next() else {
201            return Ok(None);
202        };
203        if let Some(duplicate) = candidates.next() {
204            return Err(duplicate
205                .position
206                .error(format!("duplicate '{name}' element")));
207        }
208        Some(T::from_element(result)).transpose()
209    }
210}
211
212impl<'a> Parse<'a> for Option<Result<'a, Element<'a>>> {
213    fn parse(parser: &mut Parser<'a>) -> Self {
214        // Find the opening tag if there is one
215        let open_tag = match parser.parse::<Option<Result<OpenTag>>>()? {
216            Ok(open_tag) => open_tag,
217            Err(err) => return Some(Err(err)),
218        };
219        // If the tag was self closing, return the entity
220        let mut contents = vec![];
221        if open_tag.self_closing {
222            return Some(Ok(Element {
223                name: open_tag.name,
224                position: open_tag.position,
225                attributes: open_tag.attributes,
226                contents,
227            }));
228        }
229        // Parse all the content
230        let close_tag = loop {
231            // Remove any whitespace
232            parser.take_whitespace();
233            // Check if there's a closing tag
234            if let Some(close_tag) = parser.parse::<Option<Result<CloseTag>>>() {
235                break close_tag;
236            }
237            // Otherwise, try to get content
238            match parser.parse::<Option<Result<Content>>>() {
239                Some(Err(err)) => return Some(Err(err)),
240                Some(Ok(content)) => contents.push(content),
241                None => return Some(Err(parser.position.error("missing closing tag".into()))),
242            }
243        };
244        // Ensure we didn't error getting the close tag
245        let close_tag = match close_tag {
246            Ok(close_tag) => close_tag,
247            Err(err) => return Some(Err(err)),
248        };
249        // Ensure the close and open tags match
250        if open_tag.name != close_tag.name {
251            return Some(Err(parser.position.error("mismatched closing tag".into())));
252        }
253        Some(Ok(Element {
254            name: open_tag.name,
255            attributes: open_tag.attributes,
256            contents,
257            position: open_tag.position,
258        }))
259    }
260}
261
262/// The name of an element.
263/// - Must start with a letter or underscore.
264/// - Cannot start with the letters "xml" in any case.
265/// - Consists only of letters, digits, hyphens,
266///   underscores, and periods.
267struct Name<'a>(&'a str);
268
269impl<'a> Parse<'a> for Option<Name<'a>> {
270    fn parse(parser: &mut Parser<'a>) -> Self {
271        // Ensure tail starts with a letter or underscore
272        if !parser
273            .tail
274            .starts_with(|c: char| c.is_alphabetic() || c == '_')
275        {
276            return None;
277        }
278        // Ensure tail doesn't start with 'xml' in any case
279        if parser
280            .tail
281            .get(0..3)
282            .is_some_and(|f| f.to_lowercase() == "xml")
283        {
284            return None;
285        }
286        // Find the head of the tail that only consists of
287        // digits, hyphens, underscores, and periods.
288        let len = parser
289            .tail
290            .find(|c: char| !c.is_ascii_alphanumeric() && !['.', '_', '-'].contains(&c))
291            .unwrap_or(parser.tail.len());
292        let name = parser.tail.get(..len).unwrap();
293        (!name.is_empty()).then_some(Name(parser.take(len)))
294    }
295}
296
297struct OpenTag<'a> {
298    name: &'a str,
299    attributes: HashMap<&'a str, Attribute<'a>>,
300    self_closing: bool,
301    position: Position<'a>,
302}
303
304impl<'a> Parse<'a> for Option<Result<'a, OpenTag<'a>>> {
305    fn parse(parser: &mut Parser<'a>) -> Self {
306        // Ensure we're parsing an open tag
307        if !parser.tail.starts_with('<') {
308            return None;
309        }
310        // Skip over the opening chevron
311        parser.take(1);
312        // Get the element's name
313        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
314            return Some(Err(parser.position.error("expected element name".into())));
315        };
316        // Skip any whitespace
317        parser.take_whitespace();
318        // Parse any attributes
319        let mut attributes = HashMap::new();
320        while let Some(attribute) = parser.parse::<Option<Result<Attribute>>>() {
321            match attribute {
322                Ok(attribute) => {
323                    if let Some(old) = attributes.insert(attribute.name, attribute) {
324                        let duplicate = attributes.get(old.name).unwrap();
325                        return Some(Err(duplicate
326                            .position
327                            .error(format!("found duplicate '{}' attribute", old.name))));
328                    }
329                }
330                Err(e) => return Some(Err(e)),
331            }
332            parser.take_whitespace();
333        }
334        // Ensure the opening tag ends with '/>' or '>'.
335        let self_closing = parser.tail.starts_with("/>");
336        if !self_closing && !parser.tail.starts_with(">") {
337            return Some(Err(parser.position.error("expected '>' or '/>'".into())));
338        }
339        // Skip the ending bit
340        if self_closing {
341            parser.take("/>".len());
342        } else {
343            parser.take(">".len());
344        }
345        // Build the opening tag
346        Some(Ok(OpenTag {
347            name,
348            attributes,
349            self_closing,
350            position: parser.position.clone(),
351        }))
352    }
353}
354
355#[derive(Debug)]
356pub struct Attribute<'a> {
357    pub name: &'a str,
358    pub value: Option<String>,
359    pub position: Position<'a>,
360}
361
362impl<'a> Parse<'a> for Option<Result<'a, Attribute<'a>>> {
363    fn parse(parser: &mut Parser<'a>) -> Self {
364        // Clone the parser in case we need to restore it
365        let backup = parser.clone();
366        // Get the name of the attribute
367        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
368            *parser = backup;
369            return None;
370        };
371        // If there's no value to the attribute, finish
372        // parsing.
373        if !parser.tail.starts_with('=') {
374            return Some(Ok(Attribute {
375                name,
376                value: None,
377                position: parser.position.clone(),
378            }));
379        }
380        // Skip the '='
381        parser.take(1);
382        // Parse the value of the attribute
383        let Some(AttributeValue(value)) = parser.parse::<Option<AttributeValue>>() else {
384            return Some(Err(parser
385                .position
386                .error("expected attribute value".into())));
387        };
388        Some(Ok(Attribute {
389            name,
390            value: Some(value),
391            position: parser.position.clone(),
392        }))
393    }
394}
395
396struct AttributeValue(String);
397
398impl Parse<'_> for Option<AttributeValue> {
399    fn parse(parser: &mut Parser) -> Self {
400        // Ensure the parser starts with a single or double
401        // quote.
402        let quote = match parser.tail.chars().next()? {
403            c @ ('"' | '\'') => c,
404            _ => return None,
405        };
406        // Create a working copy of the parser
407        let mut working = parser.clone();
408        working.take(1);
409        // Build out the string
410        // TODO: Add support for character entities
411        let mut value = String::new();
412        loop {
413            let next = working.take_char()?;
414            match next {
415                '\\' => match working.take_char()? {
416                    c @ ('\\' | '\'' | '"') => value.push(c),
417                    _ => return None,
418                },
419                c if c == quote => break,
420                c => value.push(c),
421            }
422        }
423        // Save the working copy of the parser
424        *parser = working;
425        Some(AttributeValue(value))
426    }
427}
428
429struct CloseTag<'a> {
430    name: &'a str,
431}
432
433impl<'a> Parse<'a> for Option<Result<'a, CloseTag<'a>>> {
434    fn parse(parser: &mut Parser<'a>) -> Self {
435        // Ensure we're at the start of a closing tag
436        if !parser.tail.starts_with("</") {
437            return None;
438        }
439        parser.take("</".len());
440        // Get the name of the closing tag
441        let Some(Name(name)) = parser.parse::<Option<Name>>() else {
442            return Some(Err(parser.position.error("expected element name".into())));
443        };
444        // Ensure we end with a '>'.
445        if !parser.tail.starts_with('>') {
446            return Some(Err(parser.position.error("expected '>'".into())));
447        }
448        // Skip the '>'.
449        parser.take(">".len());
450        Some(Ok(CloseTag { name }))
451    }
452}
453
454pub trait FromValue<'a, 'b>: Sized {
455    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self>;
456}
457
458impl<'a, 'b> FromValue<'a, 'b> for Cow<'b, str> {
459    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
460        Ok(Cow::Borrowed(value))
461    }
462}
463
464impl<'a, 'b> FromValue<'a, 'b> for &'b str {
465    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
466        Ok(value)
467    }
468}
469
470impl<'a, 'b> FromValue<'a, 'b> for &'b Path {
471    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
472        Ok(value.as_ref())
473    }
474}
475
476impl<'a, 'b> FromValue<'a, 'b> for String {
477    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
478        Ok(value.into())
479    }
480}
481
482impl<'a, 'b> FromValue<'a, 'b> for PathBuf {
483    fn from_value(value: &'b str, _position: &'b Position<'a>) -> Result<'a, Self> {
484        Ok(PathBuf::from(value))
485    }
486}
487
488pub trait FromNumeric: FromStr<Err = ParseIntError> {}
489
490impl FromNumeric for u8 {}
491
492impl FromNumeric for u16 {}
493
494impl FromNumeric for u32 {}
495
496impl FromNumeric for u64 {}
497
498impl FromNumeric for u128 {}
499
500impl FromNumeric for usize {}
501
502impl FromNumeric for i8 {}
503
504impl FromNumeric for i16 {}
505
506impl FromNumeric for i32 {}
507
508impl FromNumeric for i64 {}
509
510impl FromNumeric for i128 {}
511
512impl FromNumeric for isize {}
513
514impl<'a, 'b, T> FromValue<'a, 'b> for T
515where
516    T: FromNumeric,
517{
518    fn from_value(value: &'b str, position: &'b Position<'a>) -> Result<'a, Self> {
519        value.parse::<T>().map_err(|e| {
520            let msg = match e.kind() {
521                IntErrorKind::Empty => "failed to parse integer from empty string",
522                IntErrorKind::InvalidDigit => "value contains invalid digit",
523                IntErrorKind::PosOverflow => "value too large for this attribute",
524                IntErrorKind::NegOverflow => "value too small for this attribute",
525                IntErrorKind::Zero => "value cannot be zero for this attribute",
526                _ => "unknown integer parse error",
527            }
528            .into();
529            position.error(msg)
530        })
531    }
532}
533
534pub trait FromAttribute<'a, 'b>: Sized {
535    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self>;
536}
537
538impl<'a, 'b, T: FromValue<'a, 'b>> FromAttribute<'a, 'b> for T {
539    fn from_attribute(attribute: &'b Attribute<'a>) -> Result<'a, Self> {
540        let Some(value) = attribute.value.as_ref() else {
541            let name = attribute.name;
542            return Err(attribute
543                .position
544                .error(format!("expected non-empty value for '{name}'")));
545        };
546        T::from_value(value, &attribute.position)
547    }
548}
549
550pub trait Query<'a, 'b>: Sized {
551    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self>;
552}
553
554impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for T {
555    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
556        let Some(attribute) = element.attributes.get(name) else {
557            let msg = format!("expected '{name}' attribute");
558            return Err(element.position.error(msg));
559        };
560        T::from_attribute(attribute)
561    }
562}
563
564impl<'a, 'b, T: FromAttribute<'a, 'b>> Query<'a, 'b> for Option<T> {
565    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
566        element
567            .attributes
568            .get(name)
569            .map(|a| T::from_attribute(a))
570            .transpose()
571    }
572}
573
574impl<'a, 'b> Query<'a, 'b> for bool {
575    fn get(name: &str, element: &'b Element<'a>) -> Result<'a, Self> {
576        Ok(element.attributes.contains_key(name))
577    }
578}
579
580pub trait FromElement<'a, 'b>: Sized {
581    fn from_element(element: &'b Element<'a>) -> Result<'a, Self>;
582}
583
584impl<'a, 'b> FromElement<'a, 'b> for &'b Element<'a> {
585    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
586        Ok(element)
587    }
588}
589
590impl<'a, 'b, T> FromElement<'a, 'b> for T
591where
592    T: FromValue<'a, 'b>,
593{
594    fn from_element(element: &'b Element<'a>) -> Result<'a, Self> {
595        match element.contents.as_slice() {
596            [Content::Text(value)] => T::from_value(value, &element.position),
597            _ => Err(element
598                .position
599                .error("expected element to contain a single value".into())),
600        }
601    }
602}
603
604pub trait FromConfig
605where
606    Self: for<'a, 'b> FromElement<'a, 'b>,
607{
608    const ROOT: &'static str;
609    fn load(path: &Path) -> std::result::Result<Self, String> {
610        use std::io::Read;
611
612        let mut file =
613            File::open(path).map_err(|_| format!("could not open config: {}", path.display()))?;
614        let mut config = String::new();
615        file.read_to_string(&mut config)
616            .map_err(|_| format!("could not read config file: {}", path.display()))?;
617        let mut parser = Parser::new(path, &config);
618        let err = format!("expected root '<{}>' element", Self::ROOT);
619        let element = match parser.parse::<Option<Result<Content>>>() {
620            Some(Ok(Content::Element(e))) => {
621                if e.name != Self::ROOT {
622                    return Err(format!("{}", e.position.error(err)));
623                }
624                e
625            }
626            _ => return Err(format!("{}", parser.position.error(err))),
627        };
628        Self::from_element(&element).map_err(|e| format!("{e}"))
629    }
630}