svmap 0.3.0

A library to parse SVMap, used to map memory for emulators.
Documentation
use std::fs;
use crate::AddressType;
use crate::memory_region::*;
use crate::address_vector::AddressVector;
use lazy_static::lazy_static;
use regex::Regex;
use crate::error::Error;
use crate::error::Error::*;

/// Contains the result of compiled SVMap.
#[derive(PartialEq, Eq)]
#[derive(Debug)]
pub struct MemoryLayout<M: Clone, V: Clone> {
    regions: Vec<MemoryRegion<M>>,
    vectors: Vec<AddressVector<V>>
}


macro_rules! is_valid_number_regex {
    () => {
        r"(?:(?:(?:(?:0[Xx])|#|\$)(?P<hex>[0-9A-Fa-f](?:[0-9A-Fa-f_]*[0-9A-Fa-f])*))|(?:(?:(?:0[Bb])|%)(?P<bin>[01](?:[01_]*[01])*))|(?:(?:0[oO])(?P<oct>[0-7](?:[0-7_]*[0-7])*))|(?P<dec>[0-9](?:[0-9_]*[0-9])*))"
    };
    ($id:expr) => {
        concat!(r"(?:(?:(?:(?:0[Xx])|#|\$)(?P<hex", $id, r">[0-9A-Fa-f](?:[0-9A-Fa-f_]*[0-9A-Fa-f])*))|(?:(?:(?:0[Bb])|%)(?P<bin", $id, r">[01](?:[01_]*[01])*))|(?:(?:0[oO])(?P<oct", $id, r">[0-7](?:[0-7_]*[0-7])*))|(?P<dec", $id, r">[0-9](?:[0-9_]*[0-9])*))")
    };
}

macro_rules! from_number_capture {
    ($index:expr, $caps:expr, $bound:ident) => {
        for base in ["hex", "dec", "oct", "bin"].iter() {
            if let Some(cap) = $caps.name(base) {
                $bound = match base {
                    &"hex" => Self::interpret_hex(cap.as_str(), $index)?,
                    &"dec" => Self::interpret_dec(cap.as_str(), $index)?,
                    &"oct" => Self::interpret_oct(cap.as_str(), $index)?,
                    &"bin" => Self::interpret_bin(cap.as_str(), $index)?,
                    &_ => { panic!("this shouldn't happen..."); }
                }
            }
        }
    };
    ($index:expr, $caps:expr, $bound:ident, $id:expr) => {
        for base in [concat!("hex", $id), concat!("dec", $id), concat!("oct", $id), concat!("bin", $id)].iter() {
            if let Some(cap) = $caps.name(base) {
                $bound = match base {
                    &concat!("hex", $id) => Self::interpret_hex(cap.as_str(), $index)?,
                    &concat!("dec", $id) => Self::interpret_dec(cap.as_str(), $index)?,
                    &concat!("oct", $id) => Self::interpret_oct(cap.as_str(), $index)?,
                    &concat!("bin", $id) => Self::interpret_bin(cap.as_str(), $index)?,
                    &_ => { panic!("this shouldn't happen..."); }
                }
            }
        }
    }
}


impl<M: Clone, V: Clone> MemoryLayout<M, V> {

    fn new() -> Self {
        Self { regions: Vec::new(), vectors: Vec::new() }
    }

    /// Returns a `MemoryLayout` struct from a given filepath.
    ///
    /// The `region_interpreter` and the `vector_interpreter` convert a `&str` to the corresponding
    /// (user defined) description.
    pub fn from_file(path: &str,
                     region_interpreter: fn(&str) -> Result<M, &str>,
                     vector_interpreter: fn(&str) -> Result<V, &str>
    ) -> Result<Self, Error> {

        let file = fs::read_to_string(path);
        let file = match file {
            Ok(file) => { file }
            Err(_) => { return Err(FileNotFound(path.into())) }
        };

        Self::from_lines(file, region_interpreter, vector_interpreter)
    }

    /// Returns a `MemoryLayout` struct from a given `String`.
    ///
    /// The `region_interpreter` and the `vector_interpreter` convert a `&str` to the corresponding
    /// (user defined) description.
    pub fn from_lines(lines: String,
                       region_interpreter: fn(&str) -> Result<M, &str>,
                       vector_interpreter: fn(&str) -> Result<V, &str>
    ) -> Result<Self, Error> {

        let mut ml = Self::new();

        for (mut index, line) in lines.lines().enumerate() {
            index += 1;
            ml.interpret_line(line, index, region_interpreter, vector_interpreter)?;
        }

        Ok(ml)
    }

    fn interpret_line(&mut self, line: &str, index: usize,
                      region_interpreter: fn(&str) -> Result<M, &str>,
                      vector_interpreter: fn(&str) -> Result<V, &str>
    ) -> Result<(), Error> {

        lazy_static! {

            static ref INVALID_CHARACTER: Regex =
            Regex::new(r"[^\t[[:print:]]]").unwrap();

            static ref EMPTY_ROW: Regex =
            Regex::new(r"^[[:blank:]]*(?:\(.*\))?[[:blank:]]*$").unwrap();

            static ref REGION_ROW: Regex =
            Regex::new(concat!(
                r"^[[:blank:]]*",
                is_valid_number_regex!(0),
                r"[[:blank:]]*\.\.\.[[:blank:]]*",
                is_valid_number_regex!(1),
                r"[[:blank:]]*->[[:blank:]]*(?P<description>[[:word:]]+)[[:blank:]]*(?:\(.*\))?[[:blank:]]*$"
            )).unwrap();

            static ref VECTOR_ROW: Regex =
            Regex::new(concat!(
                r"^[[:blank:]]*@(?P<description>[[:word:]]+)[[:blank:]]*=[[:blank:]]*",
                is_valid_number_regex!(),
                r"[[:blank:]]*(\(.*\))?[[:blank:]]*$"
            )).unwrap();
        }

        if let Some(mat) = INVALID_CHARACTER.find(line) {
            return Err(Syntax(format!("invalid character at position {}", mat.start()), index))
        }

        if EMPTY_ROW.is_match(line) {
            return Ok(())
        }

        if let Some(caps) = REGION_ROW.captures(line) {
            // It is a design choice that vectors shouldn't be followed by another region assignment.
            if !self.vectors.is_empty() {
                return Err(Syntax("region assigned after a vector was assigned".into(), index))
            }

            // Using default values instead of option because the regex filters all the edge cases.
            let mut lower_bound = 0;
            let mut upper_bound = 0;

            from_number_capture!(index, caps, lower_bound, "0");
            from_number_capture!(index, caps, upper_bound, "1");

            let description = region_interpreter(caps.name("description").unwrap().into());

            let description = match description {
                Ok(desc) => { desc }
                Err(_) => { return Err(InvalidIdentifier(caps.name("description").unwrap().as_str().to_string(), index)) }
            };

            let mr = MemoryRegion::new(lower_bound, upper_bound, description);

             match mr {
                Ok(mr) => { self.push_region(mr, index)? }
                Err(e) => { return Err(Syntax(e, index)) }
            }
            return Ok(())
        }

        if let Some(caps) = VECTOR_ROW.captures(line) {
            // It is a design choice that vectors should be preceded by (all) region assignments.
            if self.regions.is_empty() {
                return Err(Syntax("vector assigned before any region was assigned".into(), index))
            }

            let description = vector_interpreter(caps.name("description").unwrap().into());

            let description = match description {
                Ok(desc) => { desc }
                Err(_) => { return Err(InvalidIdentifier(caps.name("description").unwrap().as_str().to_string(), index)) }
            };

            // Using default values instead of option because the regex filters all the edge cases.
            let mut address = 0;

            from_number_capture!(index, caps, address);

            if self.address_is_assigned(address) {
                self.vectors.push(AddressVector::new(address,  description));
            } else {
                return Err(Syntax("vector points to unassigned memory".into(), index))
            }
            return Ok(())
        }

        Err(Syntax("syntax error".into(), index))
    }

    fn interpret_hex(number: &str, index: usize) -> Result<AddressType, Error> {
        match AddressType::from_str_radix(number.replace('_', "").as_str(), 16) {
            Ok(num) => { Ok(num) }
            Err(_) => { Err(IntegerTooBig(index)) }
        }
    }

    fn interpret_dec(number: &str, index: usize) -> Result<AddressType, Error> {
        match AddressType::from_str_radix(number.replace('_', "").as_str(), 10) {
            Ok(num) => { Ok(num) }
            Err(_) => { Err(IntegerTooBig(index)) }
        }
    }

    fn interpret_oct(number: &str, index: usize) -> Result<AddressType, Error> {
        match AddressType::from_str_radix(number.replace('_', "").as_str(), 8) {
            Ok(num) => { Ok(num) }
            Err(_) => { Err(IntegerTooBig(index)) }
        }
    }

    fn interpret_bin(number: &str, index: usize) -> Result<AddressType, Error> {
        match AddressType::from_str_radix(number.replace('_', "").as_str(), 2) {
            Ok(num) => { Ok(num) }
            Err(_) => { Err(IntegerTooBig(index)) }
        }
    }

    fn push_region(&mut self, memory_region: MemoryRegion<M>, index: usize) -> Result<(), Error> {
        let last_memory_region = self.regions.last();
        let allow_appending = match last_memory_region {
            None => true,
            Some(last_mr) => memory_region.comes_after(last_mr)
        };

        if !allow_appending {
            return Err(Syntax("couldn't push memory region because it doesn't come after the last memory region".into(), index));
        }
        self.regions.push(memory_region);
        Ok(())
    }

    /// Returns all the unassigned memory regions between `0` and the last `MemoryRegion`s lower
    /// bound with the user defined description.
    pub fn get_unassigned_regions(&self, description: M) -> Vec<MemoryRegion<M>> {
        let mut unassigned_regions = Vec::new();
        let mut last_region;

        let region = match self.regions.first() {
            None => return unassigned_regions,
            Some(region) => region,
        };

        if !region.starts_at_zero() {
            unassigned_regions.push(region.region_between_zero_and_self(&description).unwrap().unwrap())
        }
        last_region = region;

        for region in &self.regions {
            if !last_region.is_adjacent_to(region) {
                unassigned_regions.push(last_region.region_between(region, &description).unwrap().unwrap())
            }
            last_region = region;
        }

        unassigned_regions
    }

    /// Returns `true` if a address is inside of a `MemoryRegion` (and thus is assigned).
    pub fn address_is_assigned(&self, address: AddressType) -> bool {
        for region in &self.regions {
            if region.address_in_region(address) {
                return true;
            }
            if !region.address_comes_after(address) {
                return false;
            }
        }
        false
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn line_interpretation() {
        fn region_interpreter(identifier: &str) -> Result<u8, &str> {
            match identifier {
                "t" => Ok(0),
                _ => Err(identifier)
            }
        }
        fn vector_interpreter(identifier: &str) -> Result<u8, &str> {
            match identifier {
                "a" => Ok(0),
                _ => Err(identifier)
            }
        }

        let ml = MemoryLayout::from_lines(
            "    ( comment  \t   )\t\n
             %0101...0o23->t\n
             @a=#A(:))".to_string(),
            region_interpreter,
            vector_interpreter
        );

        assert!(
            ml.is_ok()
        );

        let ml = ml.unwrap();

        assert_eq!(ml.regions[0], MemoryRegion::new(0b0101, 0o23, 0u8).unwrap());
        assert_eq!(ml.vectors[0], AddressVector::new(0xA, 0u8));


        assert!(
            MemoryLayout::from_lines(
            "    ( comment  \t   )\t\n
            @a=#A(:))\n
             %0101...0o23->t".to_string(),
            region_interpreter,
            vector_interpreter
            ).is_err()
        );


        assert!(
            MemoryLayout::from_lines(
            "    ( comment  \t   )\t\n
             %0101...0o23->t\n
             @a 5=#A(:))".to_string(),
            region_interpreter,
            vector_interpreter
            ).is_err()
        );
    }

    #[test]
    fn doc_test() {
        #[derive(Clone)]
        enum Region {
            Ram,
            Rom
        }

        #[derive(Clone)]
        enum Vector {
            ResetVector
        }

        fn parse_region(identifier: &str) -> Result<Region, &str> {
            match identifier {
                "ram" => Ok(Region::Ram),
                "rom" => Ok(Region::Rom),
                _ => Err(identifier)
            }
        }

        fn parse_vector(identifier: &str) -> Result<Vector, &str> {
            match identifier {
                "reset_vector" => Ok(Vector::ResetVector),
                _ => Err(identifier)
            }
        }

        // MemoryLayout::from_file( ... ) is also possible.
        let ml = MemoryLayout::from_lines(
            "(Set up memory)\n
            0x0000 ... 0x7FFF -> ram\n
            0x8000 ... 0xFFFF -> rom\n

            (Set up vectors)\n
            @reset_vector = 0xFFFD".into(),
            parse_region,
            parse_vector
        ).unwrap();
    }
}