use std::collections::HashMap;
use tree_sitter;
use tree_sitter_integerbasic;
use crate::lang;
use crate::lang::Visit;
use super::token_maps;
use log::error;
use crate::{STDRESULT,DYNERR};
pub struct Tokenizer
{
line: String,
tokenized_program: Vec<u8>,
tokenized_line: Vec<u8>,
tok_map: HashMap<&'static str,u8>,
detok_map: HashMap<u8,&'static str>
}
impl lang::Visit for Tokenizer
{
fn visit(&mut self,curs:&tree_sitter::TreeCursor) -> lang::WalkerChoice
{
if curs.node().kind()=="linenum" || curs.node().kind()=="integer" {
let cleaned = self.text(curs.node()).replace(" ","");
if let Ok(num) = u16::from_str_radix(&cleaned,10) {
let bytes = u16::to_le_bytes(num);
if curs.node().kind()=="integer" {
self.tokenized_line.push(cleaned.as_bytes()[0]+128);
}
self.tokenized_line.push(bytes[0]);
self.tokenized_line.push(bytes[1]);
return lang::WalkerChoice::GotoSibling;
}
panic!("number node did not parse as a number")
}
if let Some(tok) = self.tok_map.get(curs.node().kind()) {
self.tokenized_line.push(*tok);
return lang::WalkerChoice::GotoSibling;
}
if curs.node().kind()=="str_name" || curs.node().kind()=="int_name" {
let cleaned = self.text(curs.node()).to_uppercase().replace(" ","").as_bytes().to_vec();
let mut neg = cleaned.iter().map(|b| {
if *b==36 {
64 } else {
b+128
}
}).collect();
self.tokenized_line.append(&mut neg);
return lang::WalkerChoice::GotoSibling;
}
if curs.node().kind()=="string" {
let cleaned = self.text(curs.node()).to_uppercase().trim().as_bytes().to_vec();
let mut neg: Vec<u8> = cleaned.iter().map(|b| b+128).collect();
neg[0] = 0x28;
neg[cleaned.len()-1] = 0x29;
self.tokenized_line.append(&mut neg);
return lang::WalkerChoice::GotoSibling;
}
if curs.node().kind()=="comment_text" {
self.tokenized_line.append(&mut self.text(curs.node()).as_bytes().to_vec().iter().map(|b| b+128).collect());
return lang::WalkerChoice::GotoSibling;
}
if curs.node().named_child_count()==0 {
self.tokenized_line.append(&mut self.text(curs.node()).replace(" ","").as_bytes().to_vec());
return lang::WalkerChoice::GotoSibling;
}
return lang::WalkerChoice::GotoChild;
}
}
impl Tokenizer
{
pub fn new() -> Self
{
Self {
line: String::new(),
tokenized_line: Vec::<u8>::new(),
tokenized_program: Vec::<u8>::new(),
tok_map: HashMap::from(token_maps::TOK_MAP),
detok_map: HashMap::from(token_maps::DETOK_MAP)
}
}
fn text(&self,node: tree_sitter::Node) -> String {
let rng = std::ops::Range {start: node.range().start_point.column, end: node.range().end_point.column};
String::from(&self.line[rng])
}
fn tokenize_line(&mut self,parser: &mut tree_sitter::Parser) -> STDRESULT {
self.tokenized_line = Vec::new();
let tree = parser.parse(&self.line,None).expect("Error parsing file");
self.walk(&tree);
if self.tokenized_line.len()>126 {
error!("integer BASIC line too long");
return Err(Box::new(lang::Error::Syntax));
}
self.tokenized_line.insert(0,self.tokenized_line.len() as u8 +2);
self.tokenized_line.push(1);
Ok(())
}
pub fn tokenize(&mut self,program: String) -> Result<Vec<u8>,DYNERR> {
self.tokenized_program = Vec::new();
let mut parser = tree_sitter::Parser::new();
parser.set_language(tree_sitter_integerbasic::language()).expect("error loading integer grammar");
for line in program.lines() {
if line.len()==0 {
continue;
}
self.line = String::from(line) + "\n";
self.tokenize_line(&mut parser)?;
self.tokenized_program.append(&mut self.tokenized_line);
}
Ok(self.tokenized_program.clone())
}
pub fn detokenize(&self,img: &Vec<u8>) -> Result<String,DYNERR> {
let mut addr = 0;
let mut code = String::new();
while addr < 65536 && addr+2<img.len() {
addr += 1; let line_num: u16 = img[addr] as u16 + img[addr+1] as u16*256;
code += &(u16::to_string(&line_num) + " ");
addr += 2;
for rep in 0..256 {
if rep==255 {
error!("integer BASIC line is too long");
return Err(Box::new(lang::Error::Syntax));
}
if addr >= img.len() {
error!("program ended while processing line");
return Err(Box::new(lang::Error::Detokenization));
}
if img[addr]==1 {
code += "\n";
addr += 1;
break;
}
if img[addr]<128 {
if let Some(tok) = self.detok_map.get(&img[addr]) {
if tok.len()>1 {
code += &(String::from(" ") + &tok.to_uppercase() + " ");
} else {
code += &tok.to_uppercase();
}
addr += 1;
} else {
error!("unrecognized integer BASIC token encountered");
return Err(Box::new(lang::Error::Syntax));
}
} else {
if img[addr]>=176 && img[addr]<=185 {
if addr+2 >= img.len() {
error!("program ended while processing integer");
return Err(Box::new(lang::Error::Detokenization));
}
code += &u16::to_string(&u16::from_le_bytes([img[addr+1],img[addr+2]]));
addr += 3;
} else {
while img[addr]>=128 {
code += &String::from_utf8(vec![img[addr]-128]).expect("expected negative ASCII was not found");
addr += 1;
if addr >= img.len() {
error!("program ended while processing variable name");
return Err(Box::new(lang::Error::Detokenization));
}
}
}
}
}
}
return Ok(code);
}
}