use crate::ast::{
Include, MatchArm, Pattern, Program, SourceLocation, Span, Statement, UnionDef, UnionField,
UnionVariant, WordDef,
};
use crate::types::{Effect, SideEffect, StackType, Type};
#[derive(Debug, Clone)]
pub struct Token {
pub text: String,
pub line: usize,
pub column: usize,
}
impl Token {
fn new(text: String, line: usize, column: usize) -> Self {
Token { text, line, column }
}
}
impl PartialEq<&str> for Token {
fn eq(&self, other: &&str) -> bool {
self.text == *other
}
}
impl PartialEq<str> for Token {
fn eq(&self, other: &str) -> bool {
self.text == other
}
}
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
next_quotation_id: usize,
pending_allowed_lints: Vec<String>,
}
impl Parser {
pub fn new(source: &str) -> Self {
let tokens = tokenize(source);
Parser {
tokens,
pos: 0,
next_quotation_id: 0,
pending_allowed_lints: Vec::new(),
}
}
pub fn parse(&mut self) -> Result<Program, String> {
let mut program = Program::new();
if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
return Err(format!(
"Unclosed string literal at line {}, column {} - missing closing quote",
error_token.line + 1, error_token.column + 1
));
}
while !self.is_at_end() {
self.skip_comments();
if self.is_at_end() {
break;
}
if self.check("include") {
let include = self.parse_include()?;
program.includes.push(include);
continue;
}
if self.check("union") {
let union_def = self.parse_union_def()?;
program.unions.push(union_def);
continue;
}
let word = self.parse_word_def()?;
program.words.push(word);
}
Ok(program)
}
fn parse_include(&mut self) -> Result<Include, String> {
self.consume("include");
let token = self
.advance()
.ok_or("Expected module name after 'include'")?
.clone();
if token == "std" {
if !self.consume(":") {
return Err("Expected ':' after 'std' in include statement".to_string());
}
let name = self
.advance()
.ok_or("Expected module name after 'std:'")?
.clone();
return Ok(Include::Std(name));
}
if token == "ffi" {
if !self.consume(":") {
return Err("Expected ':' after 'ffi' in include statement".to_string());
}
let name = self
.advance()
.ok_or("Expected library name after 'ffi:'")?
.clone();
return Ok(Include::Ffi(name));
}
if token.starts_with('"') && token.ends_with('"') {
let path = token.trim_start_matches('"').trim_end_matches('"');
return Ok(Include::Relative(path.to_string()));
}
Err(format!(
"Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
token
))
}
fn parse_union_def(&mut self) -> Result<UnionDef, String> {
let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
self.consume("union");
let name = self
.advance()
.ok_or("Expected union name after 'union'")?
.clone();
if !name
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
{
return Err(format!(
"Union name '{}' must start with an uppercase letter",
name
));
}
self.skip_comments();
if !self.consume("{") {
return Err(format!(
"Expected '{{' after union name '{}', got '{}'",
name,
self.current()
));
}
let mut variants = Vec::new();
loop {
self.skip_comments();
if self.check("}") {
break;
}
if self.is_at_end() {
return Err(format!("Unexpected end of file in union '{}'", name));
}
variants.push(self.parse_union_variant()?);
}
let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
self.consume("}");
if variants.is_empty() {
return Err(format!("Union '{}' must have at least one variant", name));
}
let mut seen_variants = std::collections::HashSet::new();
for variant in &variants {
if !seen_variants.insert(&variant.name) {
return Err(format!(
"Duplicate variant name '{}' in union '{}'",
variant.name, name
));
}
}
Ok(UnionDef {
name,
variants,
source: Some(SourceLocation::span(
std::path::PathBuf::new(),
start_line,
end_line,
)),
})
}
fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
let name = self.advance().ok_or("Expected variant name")?.clone();
if !name
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
{
return Err(format!(
"Variant name '{}' must start with an uppercase letter",
name
));
}
self.skip_comments();
let fields = if self.check("{") {
self.consume("{");
let fields = self.parse_union_fields()?;
if !self.consume("}") {
return Err(format!("Expected '}}' after variant '{}' fields", name));
}
fields
} else {
Vec::new()
};
Ok(UnionVariant {
name,
fields,
source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
})
}
fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
let mut fields = Vec::new();
loop {
self.skip_comments();
if self.check("}") {
break;
}
let field_name = self.advance().ok_or("Expected field name")?.clone();
if !self.consume(":") {
return Err(format!(
"Expected ':' after field name '{}', got '{}'",
field_name,
self.current()
));
}
let type_name = self
.advance()
.ok_or("Expected type name after ':'")?
.clone();
fields.push(UnionField {
name: field_name,
type_name,
});
self.skip_comments();
self.consume(",");
}
let mut seen_fields = std::collections::HashSet::new();
for field in &fields {
if !seen_fields.insert(&field.name) {
return Err(format!("Duplicate field name '{}' in variant", field.name));
}
}
Ok(fields)
}
fn parse_word_def(&mut self) -> Result<WordDef, String> {
let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
if !self.consume(":") {
return Err(format!(
"Expected ':' to start word definition, got '{}'",
self.current()
));
}
let name = self
.advance()
.ok_or("Expected word name after ':'")?
.clone();
let effect = if self.check("(") {
Some(self.parse_stack_effect()?)
} else {
None
};
let mut body = Vec::new();
while !self.check(";") {
if self.is_at_end() {
return Err(format!("Unexpected end of file in word '{}'", name));
}
self.skip_comments();
if self.check(";") {
break;
}
body.push(self.parse_statement()?);
}
let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
self.consume(";");
Ok(WordDef {
name,
effect,
body,
source: Some(crate::ast::SourceLocation::span(
std::path::PathBuf::new(),
start_line,
end_line,
)),
allowed_lints,
})
}
fn parse_statement(&mut self) -> Result<Statement, String> {
use crate::ast::Span;
let tok = self.advance_token().ok_or("Unexpected end of file")?;
let token = &tok.text;
let tok_line = tok.line;
let tok_column = tok.column;
let tok_len = tok.text.len();
if let Some(f) = is_float_literal(token)
.then(|| token.parse::<f64>().ok())
.flatten()
{
return Ok(Statement::FloatLiteral(f));
}
if let Some(hex) = token
.strip_prefix("0x")
.or_else(|| token.strip_prefix("0X"))
{
return i64::from_str_radix(hex, 16)
.map(Statement::IntLiteral)
.map_err(|_| format!("Invalid hex literal: {}", token));
}
if let Some(bin) = token
.strip_prefix("0b")
.or_else(|| token.strip_prefix("0B"))
{
return i64::from_str_radix(bin, 2)
.map(Statement::IntLiteral)
.map_err(|_| format!("Invalid binary literal: {}", token));
}
if let Ok(n) = token.parse::<i64>() {
return Ok(Statement::IntLiteral(n));
}
if token == "true" {
return Ok(Statement::BoolLiteral(true));
}
if token == "false" {
return Ok(Statement::BoolLiteral(false));
}
if token == ":" {
let name_tok = self
.advance_token()
.ok_or("Expected symbol name after ':', got end of input")?;
let name = &name_tok.text;
if name.is_empty() {
return Err("Symbol name cannot be empty".to_string());
}
if name.starts_with(|c: char| c.is_ascii_digit()) {
return Err(format!(
"Symbol name cannot start with a digit: ':{}'\n Hint: Symbol names must start with a letter",
name
));
}
if let Some(bad_char) = name.chars().find(|c| {
!c.is_alphanumeric()
&& *c != '-'
&& *c != '_'
&& *c != '.'
&& *c != '?'
&& *c != '!'
}) {
return Err(format!(
"Symbol name contains invalid character '{}': ':{}'\n Hint: Allowed: letters, digits, - _ . ? !",
bad_char, name
));
}
return Ok(Statement::Symbol(name.clone()));
}
if token.starts_with('"') {
if token.len() < 2 || !token.ends_with('"') {
return Err(format!("Malformed string literal: {}", token));
}
let raw = &token[1..token.len() - 1];
let unescaped = unescape_string(raw)?;
return Ok(Statement::StringLiteral(unescaped));
}
if token == "if" {
return self.parse_if(tok_line, tok_column);
}
if token == "[" {
return self.parse_quotation(tok_line, tok_column);
}
if token == "match" {
return self.parse_match(tok_line, tok_column);
}
Ok(Statement::WordCall {
name: token.to_string(),
span: Some(Span::new(tok_line, tok_column, tok_len)),
})
}
fn parse_if(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
let mut then_branch = Vec::new();
loop {
if self.is_at_end() {
return Err("Unexpected end of file in 'if' statement".to_string());
}
self.skip_comments();
if self.check("else") {
self.advance();
break;
}
if self.check("then") {
self.advance();
return Ok(Statement::If {
then_branch,
else_branch: None,
span: Some(Span::new(start_line, start_column, "if".len())),
});
}
then_branch.push(self.parse_statement()?);
}
let mut else_branch = Vec::new();
loop {
if self.is_at_end() {
return Err("Unexpected end of file in 'else' branch".to_string());
}
self.skip_comments();
if self.check("then") {
self.advance();
return Ok(Statement::If {
then_branch,
else_branch: Some(else_branch),
span: Some(Span::new(start_line, start_column, "if".len())),
});
}
else_branch.push(self.parse_statement()?);
}
}
fn parse_quotation(
&mut self,
start_line: usize,
start_column: usize,
) -> Result<Statement, String> {
use crate::ast::QuotationSpan;
let mut body = Vec::new();
loop {
if self.is_at_end() {
return Err("Unexpected end of file in quotation".to_string());
}
self.skip_comments();
if self.check("]") {
let end_tok = self.advance_token().unwrap();
let end_line = end_tok.line;
let end_column = end_tok.column + 1; let id = self.next_quotation_id;
self.next_quotation_id += 1;
let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
return Ok(Statement::Quotation {
id,
body,
span: Some(span),
});
}
body.push(self.parse_statement()?);
}
}
fn parse_match(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
let mut arms = Vec::new();
loop {
self.skip_comments();
if self.check("end") {
self.advance();
break;
}
if self.is_at_end() {
return Err("Unexpected end of file in match expression".to_string());
}
arms.push(self.parse_match_arm()?);
}
if arms.is_empty() {
return Err("Match expression must have at least one arm".to_string());
}
Ok(Statement::Match {
arms,
span: Some(Span::new(start_line, start_column, "match".len())),
})
}
fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
let variant_token = self
.advance_token()
.ok_or("Expected variant name in match arm")?;
let variant_name = variant_token.text.clone();
let arm_line = variant_token.line;
let arm_column = variant_token.column;
let arm_length = variant_name.len();
self.skip_comments();
let pattern = if self.check("{") {
self.consume("{");
let mut bindings = Vec::new();
loop {
self.skip_comments();
if self.check("}") {
break;
}
if self.is_at_end() {
return Err(format!(
"Unexpected end of file in match arm bindings for '{}'",
variant_name
));
}
let token = self.advance().ok_or("Expected binding name")?.clone();
if let Some(field_name) = token.strip_prefix('>') {
if field_name.is_empty() {
return Err(format!(
"Expected field name after '>' in match bindings for '{}'",
variant_name
));
}
bindings.push(field_name.to_string());
} else {
return Err(format!(
"Match bindings must use '>' prefix to indicate stack extraction. \
Use '>{}' instead of '{}' in pattern for '{}'",
token, token, variant_name
));
}
}
self.consume("}");
Pattern::VariantWithBindings {
name: variant_name,
bindings,
}
} else {
Pattern::Variant(variant_name.clone())
};
self.skip_comments();
if !self.consume("->") {
return Err(format!(
"Expected '->' after pattern '{}', got '{}'",
match &pattern {
Pattern::Variant(n) => n.clone(),
Pattern::VariantWithBindings { name, .. } => name.clone(),
},
self.current()
));
}
let mut body = Vec::new();
loop {
self.skip_comments();
if self.check("end") {
break;
}
if let Some(token) = self.current_token()
&& let Some(first_char) = token.text.chars().next()
&& first_char.is_uppercase()
{
if let Some(next) = self.peek_at(1)
&& (next == "->" || next == "{")
{
break;
}
}
if self.is_at_end() {
return Err("Unexpected end of file in match arm body".to_string());
}
body.push(self.parse_statement()?);
}
Ok(MatchArm {
pattern,
body,
span: Some(Span::new(arm_line, arm_column, arm_length)),
})
}
fn parse_stack_effect(&mut self) -> Result<Effect, String> {
if !self.consume("(") {
return Err("Expected '(' to start stack effect".to_string());
}
let (input_row_var, input_types) =
self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
if !self.consume("--") {
return Err("Expected '--' separator in stack effect".to_string());
}
let (output_row_var, output_types) =
self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
let effects = if self.consume("|") {
self.parse_effect_annotations()?
} else {
Vec::new()
};
if !self.consume(")") {
return Err("Expected ')' to end stack effect".to_string());
}
let inputs = self.build_stack_type(input_row_var, input_types);
let outputs = self.build_stack_type(output_row_var, output_types);
Ok(Effect::with_effects(inputs, outputs, effects))
}
fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
let mut effects = Vec::new();
while let Some(token) = self.peek_at(0) {
if token == ")" {
break;
}
match token {
"Yield" => {
self.advance(); if let Some(type_token) = self.current_token() {
if type_token.text == ")" {
return Err("Expected type after 'Yield'".to_string());
}
let type_token = type_token.clone();
self.advance();
let yield_type = self.parse_type(&type_token)?;
effects.push(SideEffect::Yield(Box::new(yield_type)));
} else {
return Err("Expected type after 'Yield'".to_string());
}
}
_ => {
return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
}
}
}
if effects.is_empty() {
return Err("Expected at least one effect after '|'".to_string());
}
Ok(effects)
}
fn parse_type(&self, token: &Token) -> Result<Type, String> {
match token.text.as_str() {
"Int" => Ok(Type::Int),
"Float" => Ok(Type::Float),
"Bool" => Ok(Type::Bool),
"String" => Ok(Type::String),
"Quotation" => Err(format!(
"'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
token.line + 1,
token.column + 1
)),
_ => {
if let Some(first_char) = token.text.chars().next() {
if first_char.is_uppercase() {
Ok(Type::Var(token.text.to_string()))
} else {
Err(format!(
"Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
token.text.escape_default(),
token.line + 1, token.column + 1
))
}
} else {
Err(format!(
"Invalid type: '{}' at line {}, column {}",
token.text.escape_default(),
token.line + 1,
token.column + 1
))
}
}
}
}
fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
if name.is_empty() {
return Err("Row variable must have a name after '..'".to_string());
}
let first_char = name.chars().next().unwrap();
if !first_char.is_ascii_lowercase() {
return Err(format!(
"Row variable '..{}' must start with a lowercase letter (a-z)",
name
));
}
for ch in name.chars() {
if !ch.is_alphanumeric() && ch != '_' {
return Err(format!(
"Row variable '..{}' can only contain letters, numbers, and underscores",
name
));
}
}
match name {
"Int" | "Bool" | "String" => {
return Err(format!(
"Row variable '..{}' cannot use type name as identifier",
name
));
}
_ => {}
}
Ok(())
}
fn parse_type_list_until(
&mut self,
terminators: &[&str],
context: &str,
depth: usize,
) -> Result<(Option<String>, Vec<Type>), String> {
const MAX_QUOTATION_DEPTH: usize = 32;
if depth > MAX_QUOTATION_DEPTH {
return Err(format!(
"Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
MAX_QUOTATION_DEPTH
));
}
let mut types = Vec::new();
let mut row_var = None;
while !terminators.iter().any(|t| self.check(t)) {
self.skip_comments();
if terminators.iter().any(|t| self.check(t)) {
break;
}
if self.is_at_end() {
return Err(format!(
"Unexpected end while parsing {} - expected one of: {}",
context,
terminators.join(", ")
));
}
let token = self
.advance_token()
.ok_or_else(|| format!("Unexpected end in {}", context))?
.clone();
if token.text.starts_with("..") {
let var_name = token.text.trim_start_matches("..").to_string();
self.validate_row_var_name(&var_name)?;
row_var = Some(var_name);
} else if token.text == "Closure" {
if !self.consume("[") {
return Err("Expected '[' after 'Closure' in type signature".to_string());
}
let effect_type = self.parse_quotation_type(depth)?;
match effect_type {
Type::Quotation(effect) => {
types.push(Type::Closure {
effect,
captures: Vec::new(), });
}
_ => unreachable!("parse_quotation_type should return Quotation"),
}
} else if token.text == "[" {
types.push(self.parse_quotation_type(depth)?);
} else {
types.push(self.parse_type(&token)?);
}
}
Ok((row_var, types))
}
fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
let (input_row_var, input_types) =
self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
if !self.consume("--") {
if self.check("]") {
return Err(
"Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
.to_string(),
);
}
return Err("Expected '--' separator in quotation type".to_string());
}
let (output_row_var, output_types) =
self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
if !self.consume("]") {
return Err("Expected ']' to end quotation type".to_string());
}
let inputs = self.build_stack_type(input_row_var, input_types);
let outputs = self.build_stack_type(output_row_var, output_types);
Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
}
fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
let base = match row_var {
Some(name) => StackType::RowVar(name),
None => StackType::RowVar("rest".to_string()),
};
types.into_iter().fold(base, |stack, ty| stack.push(ty))
}
fn skip_comments(&mut self) {
loop {
let is_comment = if self.is_at_end() {
false
} else {
let tok = self.current();
tok == "#" || tok.starts_with("#!")
};
if is_comment {
self.advance();
let mut comment_parts: Vec<String> = Vec::new();
while !self.is_at_end() && self.current() != "\n" {
comment_parts.push(self.current().to_string());
self.advance();
}
if !self.is_at_end() {
self.advance(); }
let comment = comment_parts.join("");
if let Some(lint_id) = comment
.strip_prefix("seq:allow(")
.and_then(|s| s.strip_suffix(")"))
{
self.pending_allowed_lints.push(lint_id.to_string());
}
} else if self.check("\n") {
self.advance();
} else {
break;
}
}
}
fn check(&self, expected: &str) -> bool {
if self.is_at_end() {
return false;
}
self.current() == expected
}
fn consume(&mut self, expected: &str) -> bool {
if self.check(expected) {
self.advance();
true
} else {
false
}
}
fn current(&self) -> &str {
if self.is_at_end() {
""
} else {
&self.tokens[self.pos].text
}
}
fn current_token(&self) -> Option<&Token> {
if self.is_at_end() {
None
} else {
Some(&self.tokens[self.pos])
}
}
fn peek_at(&self, n: usize) -> Option<&str> {
let idx = self.pos + n;
if idx < self.tokens.len() {
Some(&self.tokens[idx].text)
} else {
None
}
}
fn advance(&mut self) -> Option<&String> {
if self.is_at_end() {
None
} else {
let token = &self.tokens[self.pos];
self.pos += 1;
Some(&token.text)
}
}
fn advance_token(&mut self) -> Option<&Token> {
if self.is_at_end() {
None
} else {
let token = &self.tokens[self.pos];
self.pos += 1;
Some(token)
}
}
fn is_at_end(&self) -> bool {
self.pos >= self.tokens.len()
}
}
fn is_float_literal(token: &str) -> bool {
let s = token.strip_prefix('-').unwrap_or(token);
if s.is_empty() {
return false;
}
s.contains('.') || s.contains('e') || s.contains('E')
}
fn unescape_string(s: &str) -> Result<String, String> {
let mut result = String::new();
let mut chars = s.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('"') => result.push('"'),
Some('\\') => result.push('\\'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some('x') => {
let hex1 = chars.next().ok_or_else(|| {
"Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
})?;
let hex2 = chars.next().ok_or_else(|| {
format!(
"Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
hex1
)
})?;
let hex_str: String = [hex1, hex2].iter().collect();
let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
format!(
"Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
hex_str
)
})?;
result.push(byte_val as char);
}
Some(c) => {
return Err(format!(
"Unknown escape sequence '\\{}' in string literal. \
Supported: \\\" \\\\ \\n \\r \\t \\xNN",
c
));
}
None => {
return Err("String ends with incomplete escape sequence '\\'".to_string());
}
}
} else {
result.push(ch);
}
}
Ok(result)
}
fn tokenize(source: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut current = String::new();
let mut current_start_line = 0;
let mut current_start_col = 0;
let mut in_string = false;
let mut prev_was_backslash = false;
let mut line = 0;
let mut col = 0;
for ch in source.chars() {
if in_string {
current.push(ch);
if ch == '"' && !prev_was_backslash {
in_string = false;
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
prev_was_backslash = false;
} else if ch == '\\' && !prev_was_backslash {
prev_was_backslash = true;
} else {
prev_was_backslash = false;
}
if ch == '\n' {
line += 1;
col = 0;
} else {
col += 1;
}
} else if ch == '"' {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
in_string = true;
current_start_line = line;
current_start_col = col;
current.push(ch);
prev_was_backslash = false;
col += 1;
} else if ch.is_whitespace() {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
if ch == '\n' {
tokens.push(Token::new("\n".to_string(), line, col));
line += 1;
col = 0;
} else {
col += 1;
}
} else if "():;[]{},".contains(ch) {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
tokens.push(Token::new(ch.to_string(), line, col));
col += 1;
} else {
if current.is_empty() {
current_start_line = line;
current_start_col = col;
}
current.push(ch);
col += 1;
}
}
if in_string {
tokens.push(Token::new(
"<<<UNCLOSED_STRING>>>".to_string(),
current_start_line,
current_start_col,
));
} else if !current.is_empty() {
tokens.push(Token::new(current, current_start_line, current_start_col));
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_hello_world() {
let source = r#"
: main ( -- )
"Hello, World!" write_line ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].name, "main");
assert_eq!(program.words[0].body.len(), 2);
match &program.words[0].body[0] {
Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
_ => panic!("Expected StringLiteral"),
}
match &program.words[0].body[1] {
Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
_ => panic!("Expected WordCall"),
}
}
#[test]
fn test_parse_with_numbers() {
let source = ": add-example ( -- ) 2 3 add ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words[0].body.len(), 3);
assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
assert!(matches!(
&program.words[0].body[2],
Statement::WordCall { name, .. } if name == "add"
));
}
#[test]
fn test_parse_hex_literals() {
let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
}
#[test]
fn test_parse_binary_literals() {
let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
}
#[test]
fn test_parse_invalid_hex_literal() {
let source = ": test ( -- ) 0xGG ;";
let mut parser = Parser::new(source);
let err = parser.parse().unwrap_err();
assert!(err.contains("Invalid hex literal"));
}
#[test]
fn test_parse_invalid_binary_literal() {
let source = ": test ( -- ) 0b123 ;";
let mut parser = Parser::new(source);
let err = parser.parse().unwrap_err();
assert!(err.contains("Invalid binary literal"));
}
#[test]
fn test_parse_escaped_quotes() {
let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].body.len(), 2);
match &program.words[0].body[0] {
Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
_ => panic!("Expected StringLiteral with escaped quotes"),
}
}
#[test]
fn test_escaped_quote_at_end_of_string() {
let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
match &program.words[0].body[0] {
Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
_ => panic!("Expected StringLiteral ending with escaped quote"),
}
}
#[test]
fn test_escaped_quote_at_start_of_string() {
let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
_ => panic!("Expected StringLiteral starting with escaped quote"),
}
}
#[test]
fn test_escape_sequences() {
let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
_ => panic!("Expected StringLiteral"),
}
}
#[test]
fn test_unknown_escape_sequence() {
let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("Unknown escape sequence"));
}
#[test]
fn test_hex_escape_sequence() {
let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => {
assert_eq!(s.len(), 5); assert_eq!(s.as_bytes()[0], 0x1b); assert_eq!(s.as_bytes()[4], 0x41); }
_ => panic!("Expected StringLiteral"),
}
}
#[test]
fn test_hex_escape_null_byte() {
let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => {
assert_eq!(s.len(), 12); assert_eq!(s.as_bytes()[6], 0x00);
}
_ => panic!("Expected StringLiteral"),
}
}
#[test]
fn test_hex_escape_uppercase() {
let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => {
assert_eq!(s, "ABO"); }
_ => panic!("Expected StringLiteral"),
}
}
#[test]
fn test_hex_escape_high_bytes() {
let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::StringLiteral(s) => {
assert_eq!(s, "\u{00FF}");
assert_eq!(s.chars().next().unwrap(), 'ÿ');
}
_ => panic!("Expected StringLiteral"),
}
}
#[test]
fn test_hex_escape_incomplete() {
let source = r#": main ( -- ) "\x1" io.write-line ;"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("Incomplete hex escape"));
}
#[test]
fn test_hex_escape_invalid_digits() {
let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("Invalid hex escape"));
}
#[test]
fn test_hex_escape_at_end_of_string() {
let source = r#": main ( -- ) "test\x" io.write-line ;"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("Incomplete hex escape"));
}
#[test]
fn test_unclosed_string_literal() {
let source = r#": main ( -- ) "unclosed string ;"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(err_msg.contains("Unclosed string literal"));
assert!(
err_msg.contains("line 1"),
"Expected line number in error: {}",
err_msg
);
assert!(
err_msg.contains("column 15"),
"Expected column number in error: {}",
err_msg
);
}
#[test]
fn test_multiple_word_definitions() {
let source = r#"
: double ( Int -- Int )
2 multiply ;
: quadruple ( Int -- Int )
double double ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 2);
assert_eq!(program.words[0].name, "double");
assert_eq!(program.words[1].name, "quadruple");
assert!(program.words[0].effect.is_some());
assert!(program.words[1].effect.is_some());
}
#[test]
fn test_user_word_calling_user_word() {
let source = r#"
: helper ( -- )
"helper called" write_line ;
: main ( -- )
helper ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 2);
match &program.words[1].body[0] {
Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
_ => panic!("Expected WordCall to helper"),
}
}
#[test]
fn test_parse_simple_stack_effect() {
let source = ": test ( Int -- Bool ) 1 ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
let word = &program.words[0];
assert!(word.effect.is_some());
let effect = word.effect.as_ref().unwrap();
assert_eq!(
effect.inputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Int
}
);
assert_eq!(
effect.outputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Bool
}
);
}
#[test]
fn test_parse_row_polymorphic_stack_effect() {
let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
let word = &program.words[0];
assert!(word.effect.is_some());
let effect = word.effect.as_ref().unwrap();
assert_eq!(
effect.inputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("a".to_string())),
top: Type::Int
}
);
assert_eq!(
effect.outputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("a".to_string())),
top: Type::Bool
}
);
}
#[test]
fn test_parse_invalid_row_var_starts_with_digit() {
let source = ": test ( ..123 Int -- ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("lowercase letter"),
"Expected error about lowercase letter, got: {}",
err_msg
);
}
#[test]
fn test_parse_invalid_row_var_starts_with_uppercase() {
let source = ": test ( ..Int Int -- ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("lowercase letter") || err_msg.contains("type name"),
"Expected error about lowercase letter or type name, got: {}",
err_msg
);
}
#[test]
fn test_parse_invalid_row_var_with_special_chars() {
let source = ": test ( ..a-b Int -- ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("letters, numbers, and underscores")
|| err_msg.contains("Unknown type"),
"Expected error about valid characters, got: {}",
err_msg
);
}
#[test]
fn test_parse_valid_row_var_with_underscore() {
let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_ok(), "Should accept row variable with underscore");
}
#[test]
fn test_parse_multiple_types_stack_effect() {
let source = ": test ( Int String -- Bool ) 1 ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (rest, top) = effect.inputs.clone().pop().unwrap();
assert_eq!(top, Type::String);
let (rest2, top2) = rest.pop().unwrap();
assert_eq!(top2, Type::Int);
assert_eq!(rest2, StackType::RowVar("rest".to_string()));
assert_eq!(
effect.outputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Bool
}
);
}
#[test]
fn test_parse_type_variable() {
let source = ": dup ( ..a T -- ..a T T ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
assert_eq!(
effect.inputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("a".to_string())),
top: Type::Var("T".to_string())
}
);
let (rest, top) = effect.outputs.clone().pop().unwrap();
assert_eq!(top, Type::Var("T".to_string()));
let (rest2, top2) = rest.pop().unwrap();
assert_eq!(top2, Type::Var("T".to_string()));
assert_eq!(rest2, StackType::RowVar("a".to_string()));
}
#[test]
fn test_parse_empty_stack_effect() {
let source = ": test ( -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
}
#[test]
fn test_parse_invalid_type() {
let source = ": test ( invalid -- Bool ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("Unknown type"));
}
#[test]
fn test_parse_unclosed_stack_effect() {
let source = ": test ( Int -- Bool body ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(err_msg.contains("Unknown type"));
}
#[test]
fn test_parse_simple_quotation_type() {
let source = ": apply ( [Int -- Int] -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (rest, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Quotation(quot_effect) => {
assert_eq!(
quot_effect.inputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Int
}
);
assert_eq!(
quot_effect.outputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Int
}
);
}
_ => panic!("Expected Quotation type, got {:?}", top),
}
assert_eq!(rest, StackType::RowVar("rest".to_string()));
}
#[test]
fn test_parse_quotation_type_with_row_vars() {
let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (rest, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Quotation(quot_effect) => {
let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
assert_eq!(q_in_top, Type::Var("T".to_string()));
assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
assert_eq!(q_out_top, Type::Bool);
assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
}
_ => panic!("Expected Quotation type, got {:?}", top),
}
assert_eq!(rest, StackType::RowVar("a".to_string()));
}
#[test]
fn test_parse_nested_quotation_type() {
let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (_, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Quotation(outer_effect) => {
let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
match outer_in_top {
Type::Quotation(inner_effect) => {
assert!(matches!(
inner_effect.inputs.clone().pop().unwrap().1,
Type::Int
));
assert!(matches!(
inner_effect.outputs.clone().pop().unwrap().1,
Type::Int
));
}
_ => panic!("Expected nested Quotation type"),
}
let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
assert_eq!(outer_out_top, Type::Bool);
}
_ => panic!("Expected Quotation type"),
}
}
#[test]
fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
let mut source = String::from(": deep ( ");
for _ in 0..35 {
source.push_str("[ -- ");
}
source.push_str("Int");
for _ in 0..35 {
source.push_str(" ]");
}
source.push_str(" -- ) ;");
let mut parser = Parser::new(&source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("depth") || err_msg.contains("32"),
"Expected depth limit error, got: {}",
err_msg
);
}
#[test]
fn test_parse_empty_quotation_type() {
let source = ": empty-quot ( [ -- ] -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (_, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Quotation(quot_effect) => {
assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
}
_ => panic!("Expected Quotation type"),
}
}
#[test]
fn test_parse_quotation_type_in_output() {
let source = ": maker ( -- [Int -- Int] ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (_, top) = effect.outputs.clone().pop().unwrap();
match top {
Type::Quotation(quot_effect) => {
assert!(matches!(
quot_effect.inputs.clone().pop().unwrap().1,
Type::Int
));
assert!(matches!(
quot_effect.outputs.clone().pop().unwrap().1,
Type::Int
));
}
_ => panic!("Expected Quotation type"),
}
}
#[test]
fn test_parse_unclosed_quotation_type() {
let source = ": broken ( [Int -- Int -- ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("Unclosed")
|| err_msg.contains("Expected")
|| err_msg.contains("Unexpected"),
"Got error: {}",
err_msg
);
}
#[test]
fn test_parse_multiple_quotation_types() {
let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (rest, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Quotation(quot_effect) => {
assert!(matches!(
quot_effect.inputs.clone().pop().unwrap().1,
Type::String
));
assert!(matches!(
quot_effect.outputs.clone().pop().unwrap().1,
Type::Bool
));
}
_ => panic!("Expected Quotation type"),
}
let (_, top2) = rest.pop().unwrap();
match top2 {
Type::Quotation(quot_effect) => {
assert!(matches!(
quot_effect.inputs.clone().pop().unwrap().1,
Type::Int
));
assert!(matches!(
quot_effect.outputs.clone().pop().unwrap().1,
Type::Int
));
}
_ => panic!("Expected Quotation type"),
}
}
#[test]
fn test_parse_quotation_type_without_separator() {
let source = ": consumer ( [Int] -- ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("require") && err_msg.contains("--"),
"Expected error about missing '--' separator, got: {}",
err_msg
);
}
#[test]
fn test_parse_bare_quotation_type_rejected() {
let source = ": apply-twice ( Int Quotation -- Int ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
"Expected error about 'Quotation' not being valid, got: {}",
err_msg
);
assert!(
err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
"Expected error to suggest explicit syntax, got: {}",
err_msg
);
}
#[test]
fn test_parse_no_stack_effect() {
let source = ": test 1 2 add ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert!(program.words[0].effect.is_none());
}
#[test]
fn test_parse_simple_quotation() {
let source = r#"
: test ( -- Quot )
[ 1 add ] ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].name, "test");
assert_eq!(program.words[0].body.len(), 1);
match &program.words[0].body[0] {
Statement::Quotation { body, .. } => {
assert_eq!(body.len(), 2);
assert_eq!(body[0], Statement::IntLiteral(1));
assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
}
_ => panic!("Expected Quotation statement"),
}
}
#[test]
fn test_parse_empty_quotation() {
let source = ": test [ ] ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
match &program.words[0].body[0] {
Statement::Quotation { body, .. } => {
assert_eq!(body.len(), 0);
}
_ => panic!("Expected Quotation statement"),
}
}
#[test]
fn test_parse_quotation_with_call() {
let source = r#"
: test ( -- )
5 [ 1 add ] call ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].body.len(), 3);
assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
match &program.words[0].body[1] {
Statement::Quotation { body, .. } => {
assert_eq!(body.len(), 2);
}
_ => panic!("Expected Quotation"),
}
assert!(matches!(
&program.words[0].body[2],
Statement::WordCall { name, .. } if name == "call"
));
}
#[test]
fn test_parse_nested_quotation() {
let source = ": test [ [ 1 add ] call ] ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
match &program.words[0].body[0] {
Statement::Quotation {
body: outer_body, ..
} => {
assert_eq!(outer_body.len(), 2);
match &outer_body[0] {
Statement::Quotation {
body: inner_body, ..
} => {
assert_eq!(inner_body.len(), 2);
assert_eq!(inner_body[0], Statement::IntLiteral(1));
assert!(
matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
);
}
_ => panic!("Expected nested Quotation"),
}
assert!(
matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
);
}
_ => panic!("Expected Quotation"),
}
}
#[test]
fn test_parse_while_with_quotations() {
let source = r#"
: countdown ( Int -- )
[ dup 0 > ] [ 1 subtract ] while drop ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].body.len(), 4);
match &program.words[0].body[0] {
Statement::Quotation { body: pred, .. } => {
assert_eq!(pred.len(), 3);
assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
assert_eq!(pred[1], Statement::IntLiteral(0));
assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
}
_ => panic!("Expected predicate quotation"),
}
match &program.words[0].body[1] {
Statement::Quotation { body, .. } => {
assert_eq!(body.len(), 2);
assert_eq!(body[0], Statement::IntLiteral(1));
assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
}
_ => panic!("Expected body quotation"),
}
assert!(matches!(
&program.words[0].body[2],
Statement::WordCall { name, .. } if name == "while"
));
assert!(matches!(
&program.words[0].body[3],
Statement::WordCall { name, .. } if name == "drop"
));
}
#[test]
fn test_parse_simple_closure_type() {
let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
let word = &program.words[0];
assert!(word.effect.is_some());
let effect = word.effect.as_ref().unwrap();
let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
assert_eq!(input_top, Type::Int);
assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
match output_top {
Type::Closure { effect, captures } => {
assert_eq!(
effect.inputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Int
}
);
assert_eq!(
effect.outputs,
StackType::Cons {
rest: Box::new(StackType::RowVar("rest".to_string())),
top: Type::Int
}
);
assert_eq!(captures.len(), 0);
}
_ => panic!("Expected Closure type, got {:?}", output_top),
}
assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
}
#[test]
fn test_parse_closure_type_with_row_vars() {
let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (rest, top) = effect.outputs.clone().pop().unwrap();
match top {
Type::Closure { effect, .. } => {
let (_, in_top) = effect.inputs.clone().pop().unwrap();
assert_eq!(in_top, Type::Var("Request".to_string()));
let (_, out_top) = effect.outputs.clone().pop().unwrap();
assert_eq!(out_top, Type::Var("Response".to_string()));
}
_ => panic!("Expected Closure type"),
}
assert_eq!(rest, StackType::RowVar("a".to_string()));
}
#[test]
fn test_parse_closure_type_missing_bracket() {
let source = ": broken ( Int -- Closure ) ;";
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err_msg = result.unwrap_err();
assert!(
err_msg.contains("[") && err_msg.contains("Closure"),
"Expected error about missing '[' after Closure, got: {}",
err_msg
);
}
#[test]
fn test_parse_closure_type_in_input() {
let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
let effect = program.words[0].effect.as_ref().unwrap();
let (_, top) = effect.inputs.clone().pop().unwrap();
match top {
Type::Closure { effect, .. } => {
assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
}
_ => panic!("Expected Closure type in input"),
}
}
#[test]
fn test_token_position_single_line() {
let source = ": main ( -- ) ;";
let tokens = tokenize(source);
assert_eq!(tokens[0].text, ":");
assert_eq!(tokens[0].line, 0);
assert_eq!(tokens[0].column, 0);
assert_eq!(tokens[1].text, "main");
assert_eq!(tokens[1].line, 0);
assert_eq!(tokens[1].column, 2);
assert_eq!(tokens[2].text, "(");
assert_eq!(tokens[2].line, 0);
assert_eq!(tokens[2].column, 7);
}
#[test]
fn test_token_position_multiline() {
let source = ": main ( -- )\n 42\n;";
let tokens = tokenize(source);
let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
assert_eq!(token_42.line, 1);
assert_eq!(token_42.column, 2);
let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
assert_eq!(token_semi.line, 2);
assert_eq!(token_semi.column, 0);
}
#[test]
fn test_word_def_source_location_span() {
let source = r#": helper ( -- )
"hello"
write_line
;
: main ( -- )
helper
;"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 2);
let helper = &program.words[0];
assert_eq!(helper.name, "helper");
let helper_source = helper.source.as_ref().unwrap();
assert_eq!(helper_source.start_line, 0);
assert_eq!(helper_source.end_line, 3);
let main_word = &program.words[1];
assert_eq!(main_word.name, "main");
let main_source = main_word.source.as_ref().unwrap();
assert_eq!(main_source.start_line, 5);
assert_eq!(main_source.end_line, 7);
}
#[test]
fn test_token_position_string_with_newline() {
let source = "\"line1\\nline2\"";
let tokens = tokenize(source);
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].line, 0);
assert_eq!(tokens[0].column, 0);
}
#[test]
fn test_parse_simple_union() {
let source = r#"
union Message {
Get { response-chan: Int }
Set { value: Int }
}
: main ( -- ) ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.unions.len(), 1);
let union_def = &program.unions[0];
assert_eq!(union_def.name, "Message");
assert_eq!(union_def.variants.len(), 2);
assert_eq!(union_def.variants[0].name, "Get");
assert_eq!(union_def.variants[0].fields.len(), 1);
assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
assert_eq!(union_def.variants[1].name, "Set");
assert_eq!(union_def.variants[1].fields.len(), 1);
assert_eq!(union_def.variants[1].fields[0].name, "value");
assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
}
#[test]
fn test_parse_union_with_multiple_fields() {
let source = r#"
union Report {
Data { op: Int, delta: Int, total: Int }
Empty
}
: main ( -- ) ;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.unions.len(), 1);
let union_def = &program.unions[0];
assert_eq!(union_def.name, "Report");
assert_eq!(union_def.variants.len(), 2);
let data_variant = &union_def.variants[0];
assert_eq!(data_variant.name, "Data");
assert_eq!(data_variant.fields.len(), 3);
assert_eq!(data_variant.fields[0].name, "op");
assert_eq!(data_variant.fields[1].name, "delta");
assert_eq!(data_variant.fields[2].name, "total");
let empty_variant = &union_def.variants[1];
assert_eq!(empty_variant.name, "Empty");
assert_eq!(empty_variant.fields.len(), 0);
}
#[test]
fn test_parse_union_lowercase_name_error() {
let source = r#"
union message {
Get { }
}
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("uppercase"));
}
#[test]
fn test_parse_union_empty_error() {
let source = r#"
union Message {
}
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("at least one variant"));
}
#[test]
fn test_parse_union_duplicate_variant_error() {
let source = r#"
union Message {
Get { x: Int }
Get { y: String }
}
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.contains("Duplicate variant name"));
assert!(err.contains("Get"));
}
#[test]
fn test_parse_union_duplicate_field_error() {
let source = r#"
union Data {
Record { x: Int, x: String }
}
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.contains("Duplicate field name"));
assert!(err.contains("x"));
}
#[test]
fn test_parse_simple_match() {
let source = r#"
: handle ( -- )
match
Get -> send-response
Set -> process-set
end
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
assert_eq!(program.words[0].body.len(), 1);
match &program.words[0].body[0] {
Statement::Match { arms, span: _ } => {
assert_eq!(arms.len(), 2);
match &arms[0].pattern {
Pattern::Variant(name) => assert_eq!(name, "Get"),
_ => panic!("Expected Variant pattern"),
}
assert_eq!(arms[0].body.len(), 1);
match &arms[1].pattern {
Pattern::Variant(name) => assert_eq!(name, "Set"),
_ => panic!("Expected Variant pattern"),
}
assert_eq!(arms[1].body.len(), 1);
}
_ => panic!("Expected Match statement"),
}
}
#[test]
fn test_parse_match_with_bindings() {
let source = r#"
: handle ( -- )
match
Get { >chan } -> chan send-response
Report { >delta >total } -> delta total process
end
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
match &program.words[0].body[0] {
Statement::Match { arms, span: _ } => {
assert_eq!(arms.len(), 2);
match &arms[0].pattern {
Pattern::VariantWithBindings { name, bindings } => {
assert_eq!(name, "Get");
assert_eq!(bindings.len(), 1);
assert_eq!(bindings[0], "chan");
}
_ => panic!("Expected VariantWithBindings pattern"),
}
match &arms[1].pattern {
Pattern::VariantWithBindings { name, bindings } => {
assert_eq!(name, "Report");
assert_eq!(bindings.len(), 2);
assert_eq!(bindings[0], "delta");
assert_eq!(bindings[1], "total");
}
_ => panic!("Expected VariantWithBindings pattern"),
}
}
_ => panic!("Expected Match statement"),
}
}
#[test]
fn test_parse_match_bindings_require_prefix() {
let source = r#"
: handle ( -- )
match
Get { chan } -> chan send-response
end
;
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.contains(">chan"));
assert!(err.contains("stack extraction"));
}
#[test]
fn test_parse_match_with_body_statements() {
let source = r#"
: handle ( -- )
match
Get -> 1 2 add send-response
Set -> process-value store
end
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::Match { arms, span: _ } => {
assert_eq!(arms[0].body.len(), 4);
assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
assert!(
matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
);
assert_eq!(arms[1].body.len(), 2);
}
_ => panic!("Expected Match statement"),
}
}
#[test]
fn test_parse_match_empty_error() {
let source = r#"
: handle ( -- )
match
end
;
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("at least one arm"));
}
#[test]
fn test_parse_symbol_literal() {
let source = r#"
: main ( -- )
:hello drop
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
assert_eq!(program.words.len(), 1);
let main = &program.words[0];
assert_eq!(main.body.len(), 2);
match &main.body[0] {
Statement::Symbol(name) => assert_eq!(name, "hello"),
_ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
}
}
#[test]
fn test_parse_symbol_with_hyphen() {
let source = r#"
: main ( -- )
:hello-world drop
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::Symbol(name) => assert_eq!(name, "hello-world"),
_ => panic!("Expected Symbol statement"),
}
}
#[test]
fn test_parse_symbol_starting_with_digit_fails() {
let source = r#"
: main ( -- )
:123abc drop
;
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("cannot start with a digit"));
}
#[test]
fn test_parse_symbol_with_invalid_char_fails() {
let source = r#"
: main ( -- )
:hello@world drop
;
"#;
let mut parser = Parser::new(source);
let result = parser.parse();
assert!(result.is_err());
assert!(result.unwrap_err().contains("invalid character"));
}
#[test]
fn test_parse_symbol_special_chars_allowed() {
let source = r#"
: main ( -- )
:empty? drop
:save! drop
;
"#;
let mut parser = Parser::new(source);
let program = parser.parse().unwrap();
match &program.words[0].body[0] {
Statement::Symbol(name) => assert_eq!(name, "empty?"),
_ => panic!("Expected Symbol statement"),
}
match &program.words[0].body[2] {
Statement::Symbol(name) => assert_eq!(name, "save!"),
_ => panic!("Expected Symbol statement"),
}
}
}