hypothalamus 0.1.0

A Brainfuck AOT compiler with an LLVM IR backend
Documentation
use std::fmt;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Op {
    Add(i32),
    Move(i64),
    Input,
    Output,
    Loop(Vec<Op>),
    Clear,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SourcePosition {
    pub byte: usize,
    pub line: usize,
    pub column: usize,
}

impl SourcePosition {
    fn new(byte: usize, line: usize, column: usize) -> Self {
        Self { byte, line, column }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxError {
    UnmatchedOpen(SourcePosition),
    UnmatchedClose(SourcePosition),
}

impl fmt::Display for SyntaxError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::UnmatchedOpen(position) => write!(
                f,
                "unmatched '[' at line {}, column {}",
                position.line, position.column
            ),
            Self::UnmatchedClose(position) => write!(
                f,
                "unmatched ']' at line {}, column {}",
                position.line, position.column
            ),
        }
    }
}

impl std::error::Error for SyntaxError {}

#[derive(Debug)]
struct Frame {
    opening: Option<SourcePosition>,
    ops: Vec<Op>,
}

pub fn parse(source: &[u8]) -> Result<Vec<Op>, SyntaxError> {
    let mut stack = vec![Frame {
        opening: None,
        ops: Vec::new(),
    }];
    let mut line = 1;
    let mut column = 1;

    for (byte_index, byte) in source.iter().copied().enumerate() {
        let position = SourcePosition::new(byte_index, line, column);

        match byte {
            b'+' => push_op(&mut stack.last_mut().expect("root frame").ops, Op::Add(1)),
            b'-' => push_op(&mut stack.last_mut().expect("root frame").ops, Op::Add(-1)),
            b'>' => push_op(&mut stack.last_mut().expect("root frame").ops, Op::Move(1)),
            b'<' => push_op(&mut stack.last_mut().expect("root frame").ops, Op::Move(-1)),
            b'.' => stack.last_mut().expect("root frame").ops.push(Op::Output),
            b',' => stack.last_mut().expect("root frame").ops.push(Op::Input),
            b'[' => stack.push(Frame {
                opening: Some(position),
                ops: Vec::new(),
            }),
            b']' => {
                if stack.len() == 1 {
                    return Err(SyntaxError::UnmatchedClose(position));
                }

                let frame = stack.pop().expect("loop frame");
                let loop_op = build_loop(frame.ops);
                push_op(&mut stack.last_mut().expect("parent frame").ops, loop_op);
            }
            _ => {}
        }

        if byte == b'\n' {
            line += 1;
            column = 1;
        } else {
            column += 1;
        }
    }

    if stack.len() != 1 {
        let frame = stack.pop().expect("unmatched loop frame");
        return Err(SyntaxError::UnmatchedOpen(
            frame.opening.expect("loop opening position"),
        ));
    }

    Ok(stack.pop().expect("root frame").ops)
}

fn build_loop(ops: Vec<Op>) -> Op {
    let optimized = optimize(ops);
    if is_clear_loop(&optimized) {
        Op::Clear
    } else {
        Op::Loop(optimized)
    }
}

fn optimize(ops: Vec<Op>) -> Vec<Op> {
    let mut output = Vec::new();

    for op in ops {
        let op = match op {
            Op::Loop(body) => build_loop(body),
            other => other,
        };
        push_op(&mut output, op);
    }

    output
}

fn push_op(output: &mut Vec<Op>, op: Op) {
    match op {
        Op::Add(delta) => {
            let delta = delta.rem_euclid(256);
            if delta == 0 {
                return;
            }

            if let Some(Op::Add(previous)) = output.last_mut() {
                *previous = (*previous + delta).rem_euclid(256);
                if *previous == 0 {
                    output.pop();
                }
                return;
            }

            output.push(Op::Add(delta));
        }
        Op::Move(delta) => {
            if delta == 0 {
                return;
            }

            if let Some(Op::Move(previous)) = output.last_mut() {
                *previous += delta;
                if *previous == 0 {
                    output.pop();
                }
                return;
            }

            output.push(Op::Move(delta));
        }
        Op::Clear => {
            if matches!(output.last(), Some(Op::Clear)) {
                return;
            }

            if matches!(output.last(), Some(Op::Add(_))) {
                output.pop();
            }

            output.push(Op::Clear);
        }
        other => output.push(other),
    }
}

fn is_clear_loop(ops: &[Op]) -> bool {
    matches!(ops, [Op::Add(1)] | [Op::Add(255)])
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn ignores_non_commands_and_combines_runs() {
        let ops = parse(b"++ ignored -- >><< .").expect("valid program");
        assert_eq!(ops, vec![Op::Output]);
    }

    #[test]
    fn builds_nested_loops() {
        let ops = parse(b"+[>+[<-]>]").expect("valid program");
        assert_eq!(
            ops,
            vec![
                Op::Add(1),
                Op::Loop(vec![
                    Op::Move(1),
                    Op::Add(1),
                    Op::Loop(vec![Op::Move(-1), Op::Add(255)]),
                    Op::Move(1)
                ])
            ]
        );
    }

    #[test]
    fn recognizes_clear_loops() {
        assert_eq!(parse(b"[-][+]").expect("valid program"), vec![Op::Clear]);
    }

    #[test]
    fn reports_unmatched_close() {
        let err = parse(b"]").expect_err("program should be invalid");
        assert_eq!(
            err,
            SyntaxError::UnmatchedClose(SourcePosition {
                byte: 0,
                line: 1,
                column: 1
            })
        );
    }

    #[test]
    fn reports_unmatched_open() {
        let err = parse(b"\n[").expect_err("program should be invalid");
        assert_eq!(
            err,
            SyntaxError::UnmatchedOpen(SourcePosition {
                byte: 1,
                line: 2,
                column: 1
            })
        );
    }
}