a2fuse 0.2.0 - Docs.rs

use crate::error::{A2FuseError, Result};
use tracing::debug;

const APPLESOFT_LOAD_ADDRESS: u16 = 0x0801;

const TOKENS: &[(u8, &str)] = &[
    (0xea, "MID$"),
    (0xe9, "RIGHT$"),
    (0xe8, "LEFT$"),
    (0xe7, "CHR$"),
    (0xe4, "STR$"),
    (0xd7, "SCRN("),
    (0xc3, "SPC("),
    (0xc0, "TAB("),
    (0xa4, "LOMEM:"),
    (0xa3, "HIMEM:"),
    (0xa2, "VTAB"),
    (0xa0, "COLOR="),
    (0x99, "SCALE="),
    (0x98, "ROT="),
    (0x96, "HTAB"),
    (0x92, "HCOLOR="),
    (0x8f, "VLIN"),
    (0x8e, "HLIN"),
    (0x8d, "PLOT"),
    (0x8b, "IN#"),
    (0x8a, "PR#"),
    (0x90, "HGR2"),
    (0xdc, "LOG"),
    (0xdd, "EXP"),
    (0xde, "COS"),
    (0xdf, "SIN"),
    (0xe0, "TAN"),
    (0xe1, "ATN"),
    (0xda, "SQR"),
    (0xdb, "RND"),
    (0xd9, "POS"),
    (0xd8, "PDL"),
    (0xd6, "FRE"),
    (0xd5, "USR"),
    (0xd4, "ABS"),
    (0xd3, "INT"),
    (0xd2, "SGN"),
    (0xe6, "ASC"),
    (0xe5, "VAL"),
    (0xe3, "LEN"),
    (0xe2, "PEEK"),
    (0xd1, "<"),
    (0xd0, "="),
    (0xcf, ">"),
    (0xce, "OR"),
    (0xcd, "AND"),
    (0xcc, "^"),
    (0xcb, "/"),
    (0xca, "*"),
    (0xc9, "-"),
    (0xc8, "+"),
    (0xc7, "STEP"),
    (0xc6, "NOT"),
    (0xc5, "AT"),
    (0xc4, "THEN"),
    (0xc2, "FN"),
    (0xc1, "TO"),
    (0xbf, "NEW"),
    (0xbe, "GET"),
    (0xbd, "CLEAR"),
    (0xbc, "LIST"),
    (0xbb, "CONT"),
    (0xba, "PRINT"),
    (0xba, "?"),
    (0xb9, "POKE"),
    (0xb8, "DEF"),
    (0xb7, "SAVE"),
    (0xb6, "LOAD"),
    (0xb5, "WAIT"),
    (0xb4, "ON"),
    (0xb3, "STOP"),
    (0xb2, "REM"),
    (0xb1, "RETURN"),
    (0xb0, "GOSUB"),
    (0xaf, "&"),
    (0xae, "RESTORE"),
    (0xad, "IF"),
    (0xac, "RUN"),
    (0xab, "GOTO"),
    (0xaa, "LET"),
    (0xa9, "SPEED="),
    (0xa8, "STORE"),
    (0xa7, "RECALL"),
    (0xa6, "RESUME"),
    (0xa5, "ONERR"),
    (0xa1, "POP"),
    (0x9f, "FLASH"),
    (0x9e, "INVERSE"),
    (0x9d, "NORMAL"),
    (0x9c, "NOTRACE"),
    (0x9b, "TRACE"),
    (0x9a, "SHLOAD"),
    (0x97, "HOME"),
    (0x95, "XDRAW"),
    (0x94, "DRAW"),
    (0x93, "HPLOT"),
    (0x91, "HGR"),
    (0x89, "TEXT"),
    (0x88, "GR"),
    (0x87, "READ"),
    (0x86, "DIM"),
    (0x85, "DEL"),
    (0x84, "INPUT"),
    (0x83, "DATA"),
    (0x82, "NEXT"),
    (0x81, "FOR"),
    (0x80, "END"),
    (0x8c, "CALL"),
];

const DETOKENS: [&str; 0x6b] = [
    "END", "FOR", "NEXT", "DATA", "INPUT", "DEL", "DIM", "READ", "GR", "TEXT", "PR#", "IN#",
    "CALL", "PLOT", "HLIN", "VLIN", "HGR2", "HGR", "HCOLOR=", "HPLOT", "DRAW", "XDRAW", "HTAB",
    "HOME", "ROT=", "SCALE=", "SHLOAD", "TRACE", "NOTRACE", "NORMAL", "INVERSE", "FLASH", "COLOR=",
    "POP", "VTAB", "HIMEM:", "LOMEM:", "ONERR", "RESUME", "RECALL", "STORE", "SPEED=", "LET",
    "GOTO", "RUN", "IF", "RESTORE", "&", "GOSUB", "RETURN", "REM", "STOP", "ON", "WAIT", "LOAD",
    "SAVE", "DEF", "POKE", "PRINT", "CONT", "LIST", "CLEAR", "GET", "NEW", "TAB(", "TO", "FN",
    "SPC(", "THEN", "AT", "NOT", "STEP", "+", "-", "*", "/", "^", "AND", "OR", ">", "=", "<",
    "SGN", "INT", "ABS", "USR", "FRE", "SCRN(", "PDL", "POS", "SQR", "RND", "LOG", "EXP", "COS",
    "SIN", "TAN", "ATN", "PEEK", "LEN", "STR$", "VAL", "ASC", "CHR$", "LEFT$", "RIGHT$", "MID$",
];

pub fn tokenize_program(text: &str) -> Result<Vec<u8>> {
    debug!("tokenizing AppleSoft BASIC text");
    let mut parsed = Vec::new();
    for raw_line in text.lines() {
        let line = raw_line.trim_end();
        if line.is_empty() {
            continue;
        }
        let (number, body) = parse_line_number(line)?;
        debug!(
            line_number = number,
            text = body,
            "parsed BASIC source line"
        );
        parsed.push((number, tokenize_line(body)?));
    }
    if parsed.is_empty() {
        return Err(A2FuseError::InvalidApplesoft(
            "expected at least one BASIC line".to_owned(),
        ));
    }

    let mut output = Vec::new();
    let mut address = APPLESOFT_LOAD_ADDRESS;
    for (index, (number, body)) in parsed.iter().enumerate() {
        let line_size = 5_usize
            .checked_add(body.len())
            .ok_or_else(|| A2FuseError::InvalidApplesoft("line is too long".to_owned()))?;
        let line_delta = u16::try_from(line_size)
            .map_err(|_| A2FuseError::InvalidApplesoft("line address overflow".to_owned()))?;
        let next_address = address
            .checked_add(line_delta)
            .ok_or_else(|| A2FuseError::InvalidApplesoft("line address overflow".to_owned()))?;
        debug!(
            line_number = *number,
            current_address = address,
            next_address,
            token_bytes = body.len(),
            "encoding BASIC line"
        );
        output.extend_from_slice(&next_address.to_le_bytes());
        output.extend_from_slice(&number.to_le_bytes());
        output.extend_from_slice(body);
        output.push(0);
        address = next_address;
        if index + 1 == parsed.len() {
            output.extend_from_slice(&0_u16.to_le_bytes());
        }
    }
    debug!(
        bytes = output.len(),
        "finished tokenizing AppleSoft BASIC program"
    );
    Ok(output)
}

pub fn detokenize_program(bytes: &[u8]) -> Result<String> {
    debug!(bytes = bytes.len(), "detokenizing AppleSoft BASIC bytes");
    let mut output = String::new();
    let mut offset = 0_usize;
    let mut first = true;

    while offset < bytes.len() {
        if bytes.len() - offset >= 2 && bytes[offset] == 0 && bytes[offset + 1] == 0 {
            if offset + 2 != bytes.len() {
                return Err(A2FuseError::InvalidApplesoft(
                    "bytes remain after BASIC terminator".to_owned(),
                ));
            }
            debug!(offset, "found BASIC end-of-program marker");
            break;
        }
        if bytes.len() - offset < 4 {
            return Err(A2FuseError::InvalidApplesoft(format!(
                "truncated line header at byte {offset}"
            )));
        }
        let next_pointer = u16::from_le_bytes([bytes[offset], bytes[offset + 1]]);
        let line_number = u16::from_le_bytes([bytes[offset + 2], bytes[offset + 3]]);
        debug!(
            offset,
            next_pointer, line_number, "decoding BASIC line header"
        );
        offset += 4;
        let mut line = Vec::new();
        while offset < bytes.len() && bytes[offset] != 0 {
            line.push(bytes[offset]);
            offset += 1;
        }
        if offset >= bytes.len() {
            return Err(A2FuseError::InvalidApplesoft(format!(
                "line {line_number} is missing a terminator"
            )));
        }
        offset += 1;

        if !first {
            output.push('\n');
        }
        first = false;
        output.push_str(&line_number.to_string());
        output.push(' ');
        output.push_str(&detokenize_line(&line)?);
        debug!(
            line_number,
            next_pointer,
            line_bytes = line.len(),
            next_offset = offset,
            "decoded BASIC line"
        );

        if next_pointer == 0 {
            debug!(line_number, "line header points to end-of-program");
            break;
        }
    }

    if output.is_empty() {
        return Err(A2FuseError::InvalidApplesoft(
            "expected at least one BASIC line".to_owned(),
        ));
    }
    debug!("finished detokenizing AppleSoft BASIC program");
    Ok(output)
}

fn parse_line_number(line: &str) -> Result<(u16, &str)> {
    let digits: String = line.chars().take_while(|ch| ch.is_ascii_digit()).collect();
    if digits.is_empty() {
        return Err(A2FuseError::InvalidApplesoft(format!(
            "line is missing a line number: {line:?}"
        )));
    }
    let number = digits
        .parse::<u16>()
        .map_err(|_| A2FuseError::InvalidApplesoft(format!("invalid line number: {line:?}")))?;
    let body = line[digits.len()..].trim_start();
    Ok((number, body))
}

fn tokenize_line(line: &str) -> Result<Vec<u8>> {
    let bytes = line.as_bytes();
    let mut output = Vec::with_capacity(bytes.len());
    let mut index = 0_usize;
    let mut in_string = false;
    let mut in_rem = false;

    while index < bytes.len() {
        let byte = bytes[index];
        if byte == b'"' {
            in_string = !in_string;
            output.push(byte);
            index += 1;
            continue;
        }
        if in_string || in_rem {
            output.push(byte);
            index += 1;
            continue;
        }
        if byte == b' ' {
            index += 1;
            continue;
        }
        if let Some((token, consumed, rem)) = match_token(bytes, index) {
            output.push(token);
            index += consumed;
            if rem {
                in_rem = true;
            }
            continue;
        }
        output.push(byte);
        index += 1;
    }
    Ok(output)
}

fn detokenize_line(line: &[u8]) -> Result<String> {
    let mut output = String::new();
    let mut in_rem = false;

    for (position, byte) in line.iter().enumerate() {
        if in_rem || *byte < 0x80 {
            output.push(char::from(*byte));
            continue;
        }
        let token_index = usize::from(*byte - 0x80);
        let token = DETOKENS
            .get(token_index)
            .ok_or_else(|| A2FuseError::InvalidApplesoft(format!("unknown token ${byte:02X}")))?;
        if should_insert_space_before(&output, token) {
            output.push(' ');
        }
        output.push_str(token);
        let next_byte = line.get(position + 1).copied();
        if should_insert_space_after(token, next_byte) {
            output.push(' ');
        }
        if *byte == 0xb2 {
            in_rem = true;
        }
    }
    Ok(output)
}

fn match_token(bytes: &[u8], index: usize) -> Option<(u8, usize, bool)> {
    for (token, keyword) in TOKENS {
        let keyword_bytes = keyword.as_bytes();
        let end = index + keyword_bytes.len();
        if end > bytes.len() {
            continue;
        }
        if !eq_ignore_ascii_case(&bytes[index..end], keyword_bytes) {
            continue;
        }
        return Some((*token, keyword_bytes.len(), *token == 0xb2));
    }
    None
}

fn eq_ignore_ascii_case(left: &[u8], right: &[u8]) -> bool {
    left.len() == right.len()
        && left
            .iter()
            .zip(right)
            .all(|(a, b)| a.eq_ignore_ascii_case(b))
}

fn should_insert_space_before(current: &str, token: &str) -> bool {
    if !is_word_like_token_text(token) {
        return false;
    }
    matches!(
        current.chars().next_back(),
        Some(ch) if ch != ' ' && (ch.is_ascii_alphanumeric() || ch == '"' || ch == ')' || ch == '$')
    )
}

fn should_insert_space_after(token: &str, next_byte: Option<u8>) -> bool {
    if !is_word_like_token_text(token) {
        return false;
    }
    match next_byte {
        Some(b' ') => false,
        Some(byte) if byte < 0x80 => byte.is_ascii_alphanumeric() || byte == b'$' || byte == b'"',
        Some(byte) => is_word_like_token_byte(byte),
        None => false,
    }
}

fn is_word_like_token_text(token: &str) -> bool {
    !token.ends_with('(')
        && !token.ends_with(':')
        && token
            .bytes()
            .all(|byte| byte.is_ascii_alphabetic() || byte == b'$')
}

fn is_word_like_token_byte(token: u8) -> bool {
    if token < 0x80 {
        return false;
    }
    DETOKENS
        .get(usize::from(token - 0x80))
        .is_some_and(|token_text| is_word_like_token_text(token_text))
}