libreda-lefdef 0.0.2

/*
 * Copyright (c) 2021-2021 Thomas Kramer.
 *
 * This file is part of LibrEDA 
 * (see https://codeberg.org/libreda/libreda-lefdef).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

//! Functions for parsing ASCII-based formats from iterators over bytes.

use itertools::{Itertools, PeekingNext};
use std::iter::Peekable;
use std::str::FromStr;
use std::fmt;
use std::num::ParseIntError;

/// Error while parsing LEF or DEF.
/// TODO: Separate lexer errors from LEF/DEF specific errors.
#[derive(Clone, Debug)]
pub enum LefDefParseError {
    /// Encountered invalid character.
    InvalidCharacter,
    /// Reached end of file before end of library arrived.
    UnexpectedEndOfFile,
    /// Expected and actual token.
    UnexpectedToken(String, String),
    /// Unknown token. The token is given as a string.
    UnknownToken(String),
    /// Unknown literal. The literal is given as a string.
    InvalidLiteral(String),
    /// Illegal value for bus bit chars.
    IllegalBusBitChars(char, char),
    /// Something is not yet implemented.
    NotImplemented(&'static str),
    /// Using a property name that has not been defined in PROPERTYDEFINITIONS.
    UndefinedProperty(String),
    /// Failed to parse an integer.
    ParseIntError(ParseIntError),
    /// Some other error defined by a string.
    Other(&'static str)
}

impl fmt::Display for LefDefParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            LefDefParseError::InvalidCharacter => write!(f, "Invalid character."),
            LefDefParseError::UnexpectedEndOfFile => write!(f, "Unexpected end of file."),
            LefDefParseError::UnexpectedToken(actual, exp) =>
                write!(f,"Unexpected token. '{}' instead of '{}'", actual, exp),
            LefDefParseError::UnknownToken(t) =>  write!(f, "Unknown token: '{}'.", t),
            LefDefParseError::InvalidLiteral(n) => write!(f, "Invalid literal: '{}'.", n),
            LefDefParseError::IllegalBusBitChars(a, b) => write!(f, "Illegal bus bit chars: '{} {}'.", a, b),
            LefDefParseError::NotImplemented(n) => write!(f, "Not implemented: '{}'.", n),
            LefDefParseError::UndefinedProperty(p) => write!(f, "Undefined property: '{}'.", p),
            LefDefParseError::Other(msg) => write!(f, "'{}'.", msg),
            LefDefParseError::ParseIntError(e) => write!(f, "Illegal integer: '{}'", e)
        }
    }
}

impl From<ParseIntError> for LefDefParseError {
    fn from(e: ParseIntError) -> Self {
        Self::ParseIntError(e)
    }
}

// /// Check if a char is whitespace.
// fn is_whitespace(c: char) -> bool {
//     match c {
//         ' ' => true,
//         '\t' => true,
//         '\r' | '\n' => true,
//         _ => false
//     }
// }


/// Read a token into the buffer. Tokens are separated by white space. Comments are ignored.
/// Quoted tokens can contain white space.
pub(crate) fn read_token<'a, I>(iter: &mut I, buffer: &'a mut String) -> Option<&'a str>
    where I: Iterator<Item=char> + PeekingNext {
    buffer.clear();

    let iter = iter.by_ref();

    loop {
        // Skip whitespace.
        let _n = iter.peeking_take_while(|c| c.is_whitespace()).count();

        // Look ahead.
        if let Some(c) = iter.peeking_next(|_| true) {
            debug_assert!(!c.is_whitespace());

            match c {
                '#' => {
                    // Skip comments.
                    iter.peeking_take_while(|&c| c != '\n' && c != '\r').count();
                }
                '"' | '\'' => {
                    // Quoted string.
                    let quote_char = c;

                    let mut prev = None;
                    while let Some(c) = iter.next() {
                        if prev != Some('\\') && c == quote_char {
                            // Abort on quote char.
                            break;
                        }
                        buffer.push(c);
                        prev = Some(c);
                    }
                    return Some(buffer.as_str());
                }
                _ => {
                    // Normal token.
                    let mut prev = Some(c);
                    buffer.push(c);

                    while let Some(c) = iter.next() {
                        if prev != Some('\\') && c.is_whitespace() {
                            // Abort on unmasked whitespace.
                            break;
                        }

                        buffer.push(c);
                        prev = Some(c);
                    }
                    return Some(buffer.as_str());
                }
            }
        } else {
            return None;
        }
    }
}

/// Read simple tokens and skip comments.
#[test]
fn test_read_token() {
    let data = r#"
        # Comment 1

        # Comment 2

        token1

        # Comment 3

        token2 token3

        "quoted token"

        token4
    "#;

    let mut iter = data.chars()
        .inspect(|c| print!("{}", c))
        .peekable();

    let mut buffer = String::new();

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_some());
    assert_eq!(buffer, "token1");

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_some());
    assert_eq!(buffer, "token2");

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_some());
    assert_eq!(buffer, "token3");

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_some());
    assert_eq!(buffer, "quoted token");

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_some());
    assert_eq!(buffer, "token4");

    let result = read_token(&mut iter, &mut buffer);
    assert!(result.is_none());
}

/// Provide sequential access to tokens that are created on the fly by
/// splitting characters at whitespace.
pub struct Tokenized<I>
    where I: Iterator<Item=char> + PeekingNext {
    iter: I,
    has_current: bool,
    current_token: Option<String>,
}

impl<I> Tokenized<I>
    where I: Iterator<Item=char> + PeekingNext {
    pub fn next_str(&mut self) -> Option<&str> {
        self.advance();
        self.current_token_str()
    }

    pub fn next_string(&mut self) -> Option<String> {
        self.advance();
        self.current_token()
    }

    pub fn take(&mut self) -> Result<String, LefDefParseError> {
        let s = self.current_token();
        self.advance();
        if let Some(s) = s {
            Ok(s)
        } else {
            Err(LefDefParseError::UnexpectedEndOfFile)
        }
    }

    pub fn take_and_parse<F: FromStr>(&mut self) -> Result<F, LefDefParseError> {
        let result = if let Some(s) = self.current_token_str() {
            if let Ok(parsed) = s.parse::<F>() {
                Ok(parsed)
            } else {
                Err(LefDefParseError::InvalidLiteral(s.to_string()))
            }
        } else {
            Err(LefDefParseError::UnexpectedEndOfFile)
        };


        self.advance();

        result
    }

    /// Advance to the next token.
    pub fn advance(&mut self) {
        let mut buffer = self.current_token.take()
            .unwrap_or_else(|| String::new());

        let next_token = read_token(&mut self.iter, &mut buffer);
        let has_next = next_token.is_some();

        self.current_token = Some(buffer);
        self.has_current = has_next;
    }

    pub fn current_token_str(&self) -> Option<&str> {
        if self.has_current {
            self.current_token.as_ref().map(|s| s.as_str())
        } else {
            None
        }
    }

    pub fn current_token(&self) -> Option<String> {
        self.current_token_str().map(|s| s.to_string())
    }

    /// Test if the current token equals to the expected token.
    /// Returns `Ok(())` if the token matches and advances the iterator.
    /// Returns the actual token otherwise.
    pub fn expect(&mut self, s: &str) -> Result<(), LefDefParseError> {
        if self.current_token.is_none() {
            Err(LefDefParseError::UnexpectedEndOfFile)?;
        }

        if self.current_token_str() == Some(s) {
            self.advance();
            Ok(())
        } else {
            Err(LefDefParseError::UnexpectedToken(
                s.to_string(), self.current_token().unwrap().to_string(),
            ))
        }
    }

    /// Test if the current token matches with the string.
    /// The token is consumed only if it matches.
    pub fn test(&mut self, s: &str) -> Result<bool, LefDefParseError> {
        let result = self.peeking_test(s)?;
        if result {
            self.advance();
        }
        Ok(result)
    }

    /// Test if the current token matches with the string.
    /// The token is not consumed.
    pub fn peeking_test(&mut self, s: &str) -> Result<bool, LefDefParseError> {
        if self.current_token.is_none() {
            Err(LefDefParseError::UnexpectedEndOfFile)?;
        }

        if self.current_token_str() == Some(s) {
            Ok(true)
        } else {
            Ok(false)
        }
    }

    /// Consume all tokens until and including `s`.
    pub fn skip_until(&mut self, s: &str) -> Result<(), LefDefParseError> {
        while !self.test(s)? {
            self.advance()
        }
        Ok(())
    }
}

/// Split a stream of characters into tokens separated by whitespace.
/// Comments are ignored.
pub fn tokenize<I>(iter: I) -> Tokenized<Peekable<I>>
    where I: Iterator<Item=char> {
    Tokenized {
        iter: iter.peekable(),
        has_current: false,
        current_token: None,
    }
}

#[test]
fn test_tokenized() {
    let data = r#"
        # Comment 1

        # Comment 2

        token1

        # Comment 3

        token2 token3

        "quoted token"

        token4
    "#;

    let mut tokens = tokenize(data.chars());

    assert_eq!(tokens.next_str(), Some("token1"));
    assert_eq!(tokens.next_str(), Some("token2"));
}