glycin-core 4.0.0-alpha

Sandboxed image decoding
use std::fmt::Display;
use std::path::Path;

#[derive(Debug, Clone)]
pub struct Identifier(Vec<Rule>);

#[derive(Debug, Clone)]
pub struct ParseError(String);

impl Display for ParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(&self.0)
    }
}

impl std::error::Error for ParseError {}

impl Identifier {
    pub fn complexity(&self) -> usize {
        self.0.len()
    }

    pub fn parse(s: &str) -> Result<Self, ParseError> {
        let ids = s.split(',');
        let mut parsed_ids = Vec::new();
        for (n, id) in ids.enumerate() {
            match Rule::parse(id) {
                Ok(parsed_id) => parsed_ids.push(parsed_id),
                Err(err) => {
                    return Err(ParseError(format!(
                        "Error in rule {n} of '{s}': {err}",
                        n = n + 1
                    )));
                }
            }
        }

        Ok(Self(parsed_ids))
    }

    pub fn matches(&self, path: Option<&Path>, head: &[u8]) -> bool {
        for id in &self.0 {
            match id {
                Rule::Ext(ext) => {
                    if let Some(path_ext) =
                        path.and_then(|x| x.extension()).and_then(|x| x.to_str())
                    {
                        if path_ext != ext {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                Rule::MagicBytes(mb) => {
                    if !head.get(mb.pos..).is_some_and(|s| s.starts_with(&mb.bytes)) {
                        return false;
                    }
                }
            }
        }

        true
    }
}

#[derive(Debug, Clone)]
enum Rule {
    MagicBytes(MagicBytes),
    Ext(String),
}

impl Rule {
    fn parse(rule: &str) -> Result<Self, String> {
        if let Some(ext) = rule.strip_prefix("ext:") {
            Ok(Self::Ext(ext.to_string()))
        } else if let Some((pos, bytes)) = rule.split_once(':') {
            Ok(Self::MagicBytes(MagicBytes::parse(pos, bytes)?))
        } else {
            Err(format!(
                "This identifier has no known prefix: {rule}. Must be 'ext:' or '<byte position>:'."
            ))
        }
    }
}

#[derive(Debug, Clone)]
struct MagicBytes {
    pos: usize,
    bytes: Vec<u8>,
}

impl MagicBytes {
    fn parse(pos: &str, sbytes: &str) -> Result<Self, String> {
        enum State {
            None,
            Backslash,
            Hex1,
            Hex2(char),
        }

        let pos = pos
            .parse()
            .map_err(|err| format!("Failed to parse position: {err}"))?;

        let mut state = State::None;
        let mut bytes = Vec::new();

        for char in sbytes.chars() {
            state = match state {
                State::None => match char {
                    '\\' => State::Backslash,
                    c => {
                        if !c.is_ascii() {
                            return Err(format!("Not an ascii char: {c}"));
                        }
                        bytes.push(c as u8);
                        State::None
                    }
                },
                State::Backslash => match char {
                    '\\' => {
                        bytes.push(b'\\');
                        State::None
                    }
                    c => {
                        if c == 'x' {
                            State::Hex1
                        } else {
                            return Err(format!("Escape sequence \\{c} not supported"));
                        }
                    }
                },
                State::Hex1 => State::Hex2(char),
                State::Hex2(c1) => {
                    let s = format!("{c1}{char}");
                    bytes.push(
                        u8::from_str_radix(&s, 16)
                            .map_err(|err| format!("Invalid hex number '{s}': {err}"))?,
                    );
                    State::None
                }
            }
        }

        Ok(Self { pos, bytes })
    }
}

#[cfg(test)]
mod tests {
    use crate::config::indentifier::{Identifier, Rule};

    #[test]
    fn test_mb_parser() {
        let id = Rule::parse(r"0:\x89PNG\x0D\x0A\x1A\x0A").unwrap();
        let Rule::MagicBytes(mb) = id else { panic!() };
        assert_eq!(mb.pos, 0);
        assert_eq!(mb.bytes, b"\x89PNG\x0D\x0A\x1A\x0A");
    }

    #[test]
    fn test_matches() {
        let identifier = Identifier::parse(r"0:\x89PNG\x0D\x0A\x1A\x0A").unwrap();
        assert!(identifier.matches(None, b"\x89PNG\x0D\x0A\x1A\x0Arandomecontent"));
        assert!(!identifier.matches(None, b"\x89PMG\x0D\x0A\x1A\x0Arandomecontent"));

        let identifier = Identifier::parse(r"0:ab,3:de").unwrap();
        assert!(identifier.matches(None, b"abCde"));
    }
}