hhh 1.0.1

The hhh Binary File Processor
Documentation
// hhh
// Copyright (c) 2023 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/sprowell/hhh

//! Deal with directives.
//!
//! This module provides the following capabilities.
//!
//! - Parse directives from a stream or a string
//! - Execute a directive that has been parsed

use std::cmp::Ordering;

use crate::directive_def::Arg;
use crate::directive_def::Directive;
use crate::expression::parse_expression_ws;
use crate::options::HhhArgs;
use trivet::errors::syntax_error;
use trivet::errors::ParseResult;
use trivet::parse_from_string;
use trivet::Parser;

/// Convert a number into a byte sequence.  This restricts the number to an i64 value.
///
/// This honors the specified byte order, and returns a vector of bytes.
/// The width is determined as follows.
///
///   * `width` is `None`: Use the minimum number of bytes to hold the number
///   * `width` is `Some(n)`: Use the specified number of bytes, truncating or padding as needed
///
pub fn number_to_bytes(number: i64, width: Option<usize>, little_endian: bool) -> Vec<u8> {
    // Convert the number to bytes.
    let mut bytes = if little_endian {
        number.to_le_bytes().to_vec()
    } else {
        number.to_be_bytes().to_vec()
    };

    // Look for width.
    if let Some(width) = width {
        // If necessary truncate or pad the bytes.
        let length = bytes.len();
        match width.cmp(&length) {
            Ordering::Less => {
                // We have to truncate the byte stream.  There is probably a clever way to do
                // this.
                if little_endian {
                    bytes = bytes[0..width].to_vec();
                } else {
                    bytes = bytes[length - width..length].to_vec();
                }
                bytes
            }
            Ordering::Greater => {
                // We have to add zeros to the byte stream.  These need to be in the most
                // significant byte position.
                for _ in length..width {
                    if little_endian {
                        bytes.push(0u8);
                    } else {
                        bytes.insert(0, 0u8);
                    }
                }
                bytes
            }
            Ordering::Equal => bytes,
        }
    } else {
        bytes
    }
}

/// Parse a directive.
///
/// On entry the parser is assumed to point to the first character of the directive.
/// Trailing whitespace is consumed.
///
/// A directive consists of the directive name, which must be a valid identifier
/// starting with a letter or underscore, and then followed by any number of
/// letters, digits, and underscores.
///
/// ```text
/// identifier -> [a-zA-Z_] [a-zA-Z0-9_]*
/// ```
///
/// An identifier may have arguments associated with it.  The argument list is
/// given in a set of parentheses, and there can be no whitespace between the
/// directive name and the opening parenthesis.  Directive arguments are
/// separated by commas.  The argument list can be empty.
///
/// ```text
/// arguments -> '(' (arg (',' arg)*)? ')'
/// ```
///
/// An argument can be a string or an expression.  Strings are double-quoted and
/// can contain escapes.  An expression uses the same syntax as in an input hexadecimal
/// stream and represents a single integer value.
///
/// Expressions in the argument list do not have to be enclosed in square brackets, but
/// they (optionally) can be.  An advantage of writing the expression in square brackets
/// is that you can convert it to a byte stream by suffixing it with the width indicator.
///
/// ```text
/// arg -> string | expression | '[' expression ']' ('/' number)
/// ```
///
/// A complete directive is then as follows.
///
/// ```text
/// directive = identifier (arguments)?
/// ```
pub fn parse_directive(parser: &mut Parser, config: &mut HhhArgs) -> ParseResult<Directive> {
    // Get the directive name.
    let mut name = parser.take_while(|ch| ch.is_alphabetic() || ch == '_' || ch == '-');
    name += &parser.take_while(|ch| ch.is_alphanumeric() || ch == '_' || ch == '-');
    parser.consume_ws();

    // Convert hyphens in the name into underscores.
    name = name.replace('-', "_");

    // Check for an argument list.
    if !parser.peek_and_consume('(') {
        parser.consume_ws();
        // No argument list.
        return Ok(Directive {
            name,
            ..Default::default()
        });
    }
    parser.consume_ws();

    // Parse the argument list.  We need to have a comma between arguments.
    let mut arguments = vec![];
    let mut first = true;
    while !parser.peek_and_consume(')') {
        // Require commas to separate arguments.
        if first {
            first = false;
        } else if parser.peek_and_consume(',') {
            parser.consume_ws();
        } else {
            return Err(syntax_error(
                parser.loc(),
                "Arguments to a directive must be comma-separated.",
            ));
        }

        // Parse a single argument.
        match parser.peek() {
            '"' => {
                // Parse a string.
                let value = parser.parse_string_match_delimiter_ws()?;
                arguments.push(Arg::String(value));
            }

            '[' => {
                // This is an expression.  Consume the bracket and then parse the
                // expression.  Then consume the closing bracket and determine what
                // to do.
                parser.consume();
                parser.consume_ws();
                let number = parse_expression_ws(parser, config)?;
                if !parser.peek_and_consume(']') {
                    return Err(syntax_error(
                        parser.loc(),
                        "Missing closing square bracket for expression.",
                    ));
                }

                // Check for a slash.  If present, it will tell us the width of the
                // number in bytes.  There should be no whitespace between the expression and
                // the slash.
                if parser.peek_and_consume('/') {
                    // Found the slash.  The next thing must be a number, so parse it.  There should
                    // be no whitespace.
                    let digits = parser.take_while_unless(|ch| ch.is_ascii_digit(), |ch| ch == '_');
                    let loc = parser.loc();
                    let width = match digits.parse::<u8>() {
                        Ok(value) => value,
                        Err(error) => {
                            return Err(syntax_error(loc, &error.to_string()));
                        }
                    } as usize;

                    // Look for a byte ordering override.
                    let next = parser.peek_n(2).to_uppercase();
                    let little_endian = match next.as_str() {
                        "LE" => {
                            parser.consume_n(2);
                            true
                        }
                        "BE" => {
                            parser.consume_n(2);
                            false
                        }
                        _ => config.little_endian,
                    };

                    // Convert the number into a byte sequence.
                    let bytes = number_to_bytes(number, Some(width), little_endian);
                    arguments.push(Arg::Bytes(bytes));
                } else {
                    // There is no width specifier.
                    arguments.push(Arg::Number(number));
                }
            }

            ',' => {
                // Missing argument.
                parser.consume();
                return Err(syntax_error(
                    parser.loc(),
                    "Expected an argument but did not find one.",
                ));
            }

            _ => {
                // Assume this is an expression.
                let value = parse_expression_ws(parser, config)?;
                arguments.push(Arg::Number(value));
            }
        }
        parser.consume_ws();
    }

    // Done.
    if arguments.is_empty() {
        Ok(Directive {
            name,
            ..Default::default()
        })
    } else {
        Ok(Directive { name, arguments })
    }
}

/// Parse a directive from a string and then try to execute that directive.
/// On success `None` is returned, otherwise an error message is given.
pub fn parse_and_do_directive(text: &str, config: &mut HhhArgs) -> ParseResult<Option<String>> {
    let mut parser = parse_from_string(text);
    let directive = parse_directive(&mut parser, config)?;
    Ok(directive.execute(config))
}

#[cfg(test)]
mod test {
    use trivet::{parse_from_string, parser::ParseResult};

    use crate::{
        directive_def::{Arg, Directive},
        options::HhhArgs,
    };

    use super::{number_to_bytes, parse_directive};

    #[test]
    fn number_to_bytes_test() {
        let mut bytes = number_to_bytes(0, Some(1), false);
        assert_eq!(bytes, vec![0u8]);
        bytes = number_to_bytes(0, Some(0), false);
        assert_eq!(bytes, vec![]);
        bytes = number_to_bytes(-1, Some(1), false);
        assert_eq!(bytes, vec![0xff]);

        bytes = number_to_bytes(0, Some(8), false);
        assert_eq!(bytes, vec![0u8; 8]);
        bytes = number_to_bytes(0, Some(36), false);
        assert_eq!(bytes, vec![0u8; 36]);
        bytes = number_to_bytes(0, Some(36), true);
        assert_eq!(bytes, vec![0u8; 36]);
        bytes = number_to_bytes(-1, Some(8), false);
        assert_eq!(bytes, vec![0xff; 8]);

        bytes = number_to_bytes(0, None, false);
        assert_eq!(bytes, vec![0u8; 8]);
        bytes = number_to_bytes(-1, None, false);
        assert_eq!(bytes, vec![0xff; 8]);

        bytes = number_to_bytes(0x218e4436, Some(6), false);
        assert_eq!(bytes, vec![0x00, 0x00, 0x21, 0x8e, 0x44, 0x36]);
        bytes = number_to_bytes(0x218e4436, None, false);
        assert_eq!(bytes, vec![0x00, 0x00, 0x00, 0x00, 0x21, 0x8e, 0x44, 0x36]);

        bytes = number_to_bytes(0x218e4436, Some(6), true);
        assert_eq!(bytes, vec![0x36, 0x44, 0x8e, 0x21, 0x00, 0x00]);
        bytes = number_to_bytes(0x218e4436, None, true);
        assert_eq!(bytes, vec![0x36, 0x44, 0x8e, 0x21, 0x00, 0x00, 0x00, 0x00]);

        bytes = number_to_bytes(-0x218e4436, Some(6), true);
        assert_eq!(
            bytes,
            vec![
                0xff - 0x36 + 1,
                0xff - 0x44,
                0xff - 0x8e,
                0xff - 0x21,
                0xff - 0x00,
                0xff - 0x00
            ]
        );
        bytes = number_to_bytes(-0x218e4436, None, true);
        assert_eq!(
            bytes,
            vec![
                0xff - 0x36 + 1,
                0xff - 0x44,
                0xff - 0x8e,
                0xff - 0x21,
                0xff - 0x00,
                0xff - 0x00,
                0xff - 0x00,
                0xff - 0x00
            ]
        );
    }

    #[test]
    fn parse_directive_test() -> ParseResult<()> {
        let mut config = HhhArgs::default();
        let mut parser = parse_from_string("simple");
        let mut result = parse_directive(&mut parser, &mut config)?;
        assert_eq!(
            result,
            Directive {
                name: "simple".to_string(),
                arguments: vec![],
            }
        );

        config.set_variable("dig", &[0x7f, 0xfe, 0xee, 0x0c, 0xde, 0xad, 0xbe, 0xef]);
        parser = parse_from_string("f(1,2,[3/3],[52]/2,$dig,\"dig\")");
        result = parse_directive(&mut parser, &mut config)?;
        assert_eq!(
            result,
            Directive {
                name: "f".to_string(),
                arguments: vec![
                    Arg::Number(1),
                    Arg::Number(2),
                    Arg::Number(1),
                    Arg::Bytes(vec![0, 52]),
                    Arg::Number(0x7ffeee0cdeadbeef),
                    Arg::String("dig".to_string())
                ],
            }
        );

        parser = parse_from_string("f( [ 6 * 2 ]/3le , [ 2 * 6 ]/4be , ( ( 2 * 3 ) ** 2 ) >> 1 )");
        result = parse_directive(&mut parser, &mut config)?;
        assert_eq!(
            result,
            Directive {
                name: "f".to_string(),
                arguments: vec![
                    Arg::Bytes(vec![12, 0, 0]),
                    Arg::Bytes(vec![0, 0, 0, 12]),
                    Arg::Number(18)
                ],
            }
        );

        parser = parse_from_string("f()");
        result = parse_directive(&mut parser, &mut config)?;
        assert_eq!(
            result,
            Directive {
                name: "f".to_string(),
                arguments: vec![],
            }
        );

        parser = parse_from_string("f(1");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(1,");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(1,\"");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(1,$");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(1,$moo, [2*6)");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(1,$moo,,)");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f([2]/256)");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f([2");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        parser = parse_from_string("f(,");
        assert!(parse_directive(&mut parser, &mut config).is_err());
        Ok(())
    }
}