use logos::Logos;
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(skip r"[ \t]+")]
#[logos(skip(r"--[^\n]*", allow_greedy = true))]
pub enum Token {
#[token("type")]
Type,
#[token("tool")]
Tool,
#[token("use")]
Use,
#[token("with")]
With,
#[token("timeout")]
Timeout,
#[token("retry")]
Retry,
#[token("L")]
ListType,
#[token("R")]
ResultType,
#[token("F")]
FnType,
#[token("O")]
OptType,
#[token("M")]
MapType,
#[token("S")]
SumType,
#[token("W")]
WorldType,
#[token("by")]
By,
#[token("U32")]
U32Type,
#[token("U64")]
U64Type,
#[token("I64")]
I64Type,
#[token("if")]
KwIf,
#[token("return")]
KwReturn,
#[token("let")]
KwLet,
#[token("fn")]
KwFn,
#[token("def")]
KwDef,
#[token("var")]
KwVar,
#[token("const")]
KwConst,
#[token("true")]
True,
#[token("false")]
False,
#[token("nil")]
Nil,
#[token(">=")]
GreaterEq,
#[token("<=")]
LessEq,
#[token("!=")]
NotEq,
#[token("+=")]
PlusEq,
#[token(">>")]
PipeOp,
#[token("??")]
NilCoalesce,
#[token("!!")]
BangBang,
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Star,
#[token("/")]
Slash,
#[token(">")]
Greater,
#[token("<")]
Less,
#[token("=")]
#[token("==")]
Eq,
#[token("&")]
Amp,
#[token("|")]
Pipe,
#[token("?")]
Question,
#[token("@")]
At,
#[token("!")]
Bang,
#[token("^")]
Caret,
#[token("~")]
Tilde,
#[token("$")]
Dollar,
#[token(":")]
Colon,
#[token(";")]
Semi,
#[token("..")]
DotDot,
#[token(".?")]
DotQuestion,
#[token(".")]
Dot,
#[token(",")]
Comma,
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("(")]
LParen,
#[token(")")]
RParen,
#[token("[")]
LBracket,
#[token("]")]
RBracket,
#[token("_")]
Underscore,
#[regex(r"-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
#[regex(r"0[xX][0-9a-fA-F]+", |lex| {
let s = lex.slice();
u64::from_str_radix(&s[2..], 16).ok().map(|n| n as f64)
})]
#[regex(r"0[bB][01]+", |lex| {
let s = lex.slice();
u64::from_str_radix(&s[2..], 2).ok().map(|n| n as f64)
})]
#[regex(r"0[oO][0-7]+", |lex| {
let s = lex.slice();
u64::from_str_radix(&s[2..], 8).ok().map(|n| n as f64)
})]
Number(f64),
#[regex(r#""[^"\\]*(?:\\.[^"\\]*)*""#, |lex| {
let s = lex.slice();
let inner = &s[1..s.len()-1];
let mut out = String::with_capacity(inner.len());
let mut chars = inner.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some('"') => out.push('"'),
Some('\\') => out.push('\\'),
Some('f') => out.push('\u{000C}'),
Some('b') => out.push('\u{0008}'),
Some('v') => out.push('\u{000B}'),
Some('a') => out.push('\u{0007}'),
Some('0') => out.push('\u{0000}'),
Some('/') => out.push('/'),
Some('x') => {
// \xNN — two hex digits encode a Unicode scalar in
// U+0000..=U+00FF. Non-hex digits or a truncated
// sequence are passed through literally (same lenient
// policy as unknown escapes below), keeping the lexer
// infallible so the parser surfaces a clean diagnostic
// rather than a lexer crash.
let hi = chars.next();
let lo = chars.next();
match (hi, lo) {
(Some(h), Some(l))
if h.is_ascii_hexdigit() && l.is_ascii_hexdigit() =>
{
let val = u8::from_str_radix(
&format!("{h}{l}"),
16,
)
.expect("two hex digits always parse as u8");
out.push(char::from(val));
}
(Some(h), Some(l)) => {
// Non-hex: pass through literally.
out.push('\\');
out.push('x');
out.push(h);
out.push(l);
}
(Some(h), None) => {
out.push('\\');
out.push('x');
out.push(h);
}
(None, _) => {
out.push('\\');
out.push('x');
}
}
}
Some(other) => { out.push('\\'); out.push(other); }
None => {}
}
} else {
out.push(c);
}
}
Some(out)
})]
Text(String),
#[regex(r"[a-z][a-z0-9]*(-[a-z0-9]+)*", |lex| lex.slice().to_string(), priority = 1)]
Ident(String),
#[token("\n")]
Newline,
}
impl Token {
pub fn user_facing_name(&self) -> String {
match self {
Token::By => "`by`".into(),
Token::Type => "`type`".into(),
Token::Tool => "`tool`".into(),
Token::Use => "`use`".into(),
Token::With => "`with`".into(),
Token::Timeout => "`timeout`".into(),
Token::Retry => "`retry`".into(),
Token::ListType => "`L`".into(),
Token::ResultType => "`R`".into(),
Token::FnType => "`F`".into(),
Token::OptType => "`O`".into(),
Token::MapType => "`M`".into(),
Token::SumType => "`S`".into(),
Token::U32Type => "`U32`".into(),
Token::U64Type => "`U64`".into(),
Token::I64Type => "`I64`".into(),
Token::WorldType => "`W`".into(),
Token::KwIf => "`if`".into(),
Token::KwReturn => "`return`".into(),
Token::KwLet => "`let`".into(),
Token::KwFn => "`fn`".into(),
Token::KwDef => "`def`".into(),
Token::KwVar => "`var`".into(),
Token::KwConst => "`const`".into(),
Token::True => "`true`".into(),
Token::False => "`false`".into(),
Token::Nil => "`nil`".into(),
Token::GreaterEq => "`>=`".into(),
Token::LessEq => "`<=`".into(),
Token::NotEq => "`!=`".into(),
Token::PlusEq => "`+=`".into(),
Token::PipeOp => "`>>`".into(),
Token::NilCoalesce => "`??`".into(),
Token::BangBang => "`!!`".into(),
Token::Plus => "`+`".into(),
Token::Minus => "`-`".into(),
Token::Star => "`*`".into(),
Token::Slash => "`/`".into(),
Token::Greater => "`>`".into(),
Token::Less => "`<`".into(),
Token::Eq => "`=`".into(),
Token::Amp => "`&`".into(),
Token::Pipe => "`|`".into(),
Token::Question => "`?`".into(),
Token::At => "`@`".into(),
Token::Bang => "`!`".into(),
Token::Caret => "`^`".into(),
Token::Tilde => "`~`".into(),
Token::Dollar => "`$`".into(),
Token::Colon => "`:`".into(),
Token::Semi => "`;`".into(),
Token::DotDot => "`..`".into(),
Token::DotQuestion => "`.?`".into(),
Token::Dot => "`.`".into(),
Token::Comma => "`,`".into(),
Token::LBrace => "`{`".into(),
Token::RBrace => "`}`".into(),
Token::LParen => "`(`".into(),
Token::RParen => "`)`".into(),
Token::LBracket => "`[`".into(),
Token::RBracket => "`]`".into(),
Token::Underscore => "`_`".into(),
Token::Number(n) => {
if n.fract() == 0.0 && n.is_finite() && n.abs() < 1e16 {
format!("number `{}`", *n as i64)
} else {
format!("number `{n}`")
}
}
Token::Text(s) => format!("text `\"{s}\"`"),
Token::Ident(name) => format!("identifier `{name}`"),
Token::Newline => "newline".into(),
}
}
}
pub fn normalize_newlines(source: &str) -> String {
normalize_newlines_with_map(source).0
}
pub fn normalize_newlines_with_map(source: &str) -> (String, Vec<u32>) {
if !source.contains('\n') && !source.contains("\"\"\"") {
let len = source.len();
let mut map = Vec::with_capacity(len + 1);
for i in 0..=len {
map.push(i as u32);
}
return (source.to_string(), map);
}
let mut out = String::with_capacity(source.len());
let mut map: Vec<u32> = Vec::with_capacity(source.len() + 1);
let mut last_significant: Option<char> = None;
let mut bracket_depth: u32 = 0;
let mut iter = source.char_indices().peekable();
fn push_str_with(out: &mut String, map: &mut Vec<u32>, s: &str, orig: usize) {
for _ in 0..s.len() {
map.push(orig as u32);
}
out.push_str(s);
}
fn push_char_with(out: &mut String, map: &mut Vec<u32>, c: char, orig: usize) {
let mut buf = [0u8; 4];
let s = c.encode_utf8(&mut buf);
push_str_with(out, map, s, orig);
}
while let Some((i, c)) = iter.next() {
if c == '\r' && iter.peek().map(|(_, ch)| *ch) == Some('\n') {
continue;
}
if c == '"' {
let src_bytes = source.as_bytes();
if src_bytes.get(i + 1) == Some(&b'"') && src_bytes.get(i + 2) == Some(&b'"') {
iter.next();
iter.next();
let content_start = i + 3;
let mut j = content_start;
while j + 2 < src_bytes.len()
&& !(src_bytes[j] == b'"'
&& src_bytes[j + 1] == b'"'
&& src_bytes[j + 2] == b'"')
{
j += 1;
}
let (content_end, close_end) = if j + 2 < src_bytes.len() {
(j, j + 3)
} else {
(src_bytes.len(), src_bytes.len())
};
while let Some(&(pi, _)) = iter.peek() {
if pi >= close_end {
break;
}
iter.next();
}
let raw = &source[content_start..content_end];
let stripped = strip_triple_indent(raw);
push_char_with(&mut out, &mut map, '"', i);
let stripped_chars: Vec<(usize, char)> = stripped.bytes.char_indices().collect();
let mut k = 0usize;
while k < stripped_chars.len() {
let (b_off, ch) = stripped_chars[k];
let orig = stripped.src_offsets[b_off] + content_start;
match ch {
'\n' => {
push_str_with(&mut out, &mut map, "\\n", orig);
}
'\r' => {
push_str_with(&mut out, &mut map, "\\r", orig);
}
'"' => {
push_str_with(&mut out, &mut map, "\\\"", orig);
}
'\\' => {
push_char_with(&mut out, &mut map, '\\', orig);
if k + 1 < stripped_chars.len() {
let (nb_off, nch) = stripped_chars[k + 1];
let norig = stripped.src_offsets[nb_off] + content_start;
match nch {
'\n' => push_char_with(&mut out, &mut map, 'n', norig),
'\r' => push_char_with(&mut out, &mut map, 'r', norig),
_ => push_char_with(&mut out, &mut map, nch, norig),
}
k += 2;
continue;
}
}
_ => {
push_char_with(&mut out, &mut map, ch, orig);
}
}
k += 1;
}
let close_pos = if close_end > content_end {
content_end
} else {
source.len().saturating_sub(1)
};
push_char_with(&mut out, &mut map, '"', close_pos);
last_significant = Some('"');
continue;
}
push_char_with(&mut out, &mut map, c, i);
last_significant = Some(c);
while let Some((si, sc)) = iter.next() {
push_char_with(&mut out, &mut map, sc, si);
if sc == '\\' {
if let Some((ei, esc)) = iter.next() {
push_char_with(&mut out, &mut map, esc, ei);
}
} else if sc == '"' {
last_significant = Some(sc);
break;
}
}
} else if c == '-' && iter.peek().map(|(_, ch)| *ch) == Some('-') {
iter.next(); while let Some(&(_, nc)) = iter.peek() {
if nc == '\n' {
break;
}
iter.next();
}
} else if c == '\n' {
if bracket_depth > 0 {
push_char_with(&mut out, &mut map, ' ', i);
while matches!(iter.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
iter.next();
}
continue;
}
loop {
let mut probe = iter.clone();
while matches!(probe.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
probe.next();
}
if probe.peek().map(|(_, ch)| *ch) == Some('\n') {
while matches!(iter.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
iter.next();
}
iter.next(); } else {
break;
}
}
if matches!(iter.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
let mut lookahead = iter.clone();
while matches!(lookahead.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
lookahead.next();
}
let next_is_pipe = {
let mut probe = lookahead.clone();
probe.next().map(|(_, c)| c) == Some('>')
&& probe.next().map(|(_, c)| c) == Some('>')
};
if last_significant == Some('{') || out.ends_with(';') || next_is_pipe {
} else {
push_char_with(&mut out, &mut map, ';', i);
}
while matches!(iter.peek().map(|(_, ch)| *ch), Some(' ') | Some('\t')) {
iter.next();
}
if iter.peek().map(|(_, ch)| *ch) == Some('}')
&& last_significant != Some('{')
&& out.ends_with(';')
{
out.pop(); map.pop();
}
} else if iter.peek().map(|(_, ch)| *ch) == Some('}') {
} else {
push_char_with(&mut out, &mut map, '\n', i);
}
} else {
push_char_with(&mut out, &mut map, c, i);
if !c.is_ascii_whitespace() {
last_significant = Some(c);
}
match c {
'(' | '[' => bracket_depth += 1,
')' | ']' => {
bracket_depth = bracket_depth.saturating_sub(1);
}
_ => {}
}
}
}
map.push(source.len() as u32);
debug_assert_eq!(map.len(), out.len() + 1);
(out, map)
}
struct StrippedTriple {
bytes: String,
src_offsets: Vec<usize>,
}
fn strip_triple_indent(raw: &str) -> StrippedTriple {
let bytes = raw.as_bytes();
if !bytes.contains(&b'\n') {
let src_offsets: Vec<usize> = (0..bytes.len()).collect();
return StrippedTriple {
bytes: raw.to_string(),
src_offsets,
};
}
let mut start = 0usize;
if bytes.first() == Some(&b'\n') {
start = 1;
} else if bytes.len() >= 2 && bytes[0] == b'\r' && bytes[1] == b'\n' {
start = 2;
}
let (end, dedent_active) = {
let mut last_nl: Option<usize> = None;
for (k, &b) in bytes[start..].iter().enumerate() {
if b == b'\n' {
last_nl = Some(start + k);
}
}
match last_nl {
Some(nl) => {
let after_nl = &bytes[nl + 1..];
if after_nl.iter().all(|&b| b == b' ' || b == b'\t') {
(nl + 1, true)
} else {
(bytes.len(), false)
}
}
None => (bytes.len(), false),
}
};
let inner = &raw[start..end];
if !dedent_active {
let src_offsets: Vec<usize> = (start..end).collect();
return StrippedTriple {
bytes: inner.to_string(),
src_offsets,
};
}
let lines: Vec<&str> = inner.split('\n').collect();
let closing_indent = &raw[end..];
let mut common: Option<&str> = None;
for line in lines.iter() {
if line.chars().all(|c| c == ' ' || c == '\t') {
continue;
}
let lead: &str = {
let mut byte_end = 0;
for (off, ch) in line.char_indices() {
if ch == ' ' || ch == '\t' {
byte_end = off + ch.len_utf8();
} else {
break;
}
}
&line[..byte_end]
};
common = Some(match common {
None => lead,
Some(prev) => common_prefix(prev, lead),
});
}
let common: &str = match (common, closing_indent.is_empty()) {
(Some(c), false) => common_prefix(c, closing_indent),
(Some(c), true) => c,
(None, _) => closing_indent,
};
let strip_len = common.len();
let mut out_bytes = String::with_capacity(inner.len());
let mut src_offsets: Vec<usize> = Vec::with_capacity(inner.len());
let mut line_offset_in_inner = 0usize;
for (li, line) in lines.iter().enumerate() {
let line_bytes = line.as_bytes();
let drop = if line_bytes.starts_with(common.as_bytes()) {
strip_len
} else if line.chars().all(|c| c == ' ' || c == '\t') {
line_bytes.len()
} else {
0
};
let line_abs_start = start + line_offset_in_inner;
for (off, ch) in line[drop..].char_indices() {
let abs = line_abs_start + drop + off;
out_bytes.push(ch);
for _ in 0..ch.len_utf8() {
src_offsets.push(abs);
}
}
if li + 1 < lines.len() {
out_bytes.push('\n');
src_offsets.push(line_abs_start + line.len());
}
line_offset_in_inner += line.len() + 1; }
debug_assert_eq!(out_bytes.len(), src_offsets.len());
StrippedTriple {
bytes: out_bytes,
src_offsets,
}
}
fn common_prefix<'a>(a: &'a str, b: &str) -> &'a str {
let ab = a.as_bytes();
let bb = b.as_bytes();
let mut i = 0;
while i < ab.len() && i < bb.len() && ab[i] == bb[i] {
i += 1;
}
&a[..i]
}
pub fn lex(source: &str) -> Result<Vec<(Token, std::ops::Range<usize>)>, LexError> {
let (normalized, span_map) = normalize_newlines_with_map(source);
let remap = |off: usize| -> usize {
span_map
.get(off)
.copied()
.map(|x| x as usize)
.unwrap_or(source.len())
};
match lex_normalized(&normalized) {
Ok(mut tokens) => {
for (_, sp) in tokens.iter_mut() {
*sp = remap(sp.start)..remap(sp.end);
}
Ok(tokens)
}
Err(mut e) => {
e.position = remap(e.position);
Err(e)
}
}
}
fn lex_normalized(normalized: &str) -> Result<Vec<(Token, std::ops::Range<usize>)>, LexError> {
let mut lexer = Token::lexer(normalized);
let mut tokens: Vec<(Token, std::ops::Range<usize>)> = Vec::new();
while let Some(result) = lexer.next() {
match result {
Ok(token) => {
let span = lexer.span();
if is_type_sigil(&token) {
let prev_is_ident_flush = matches!(
tokens.last(),
Some((Token::Ident(_), s)) if s.end == span.start
);
let prev_is_type_position = matches!(
tokens.last().map(|(t, _)| t),
Some(Token::Colon)
| Some(Token::Greater)
| Some(Token::ListType)
| Some(Token::ResultType)
| Some(Token::FnType)
| Some(Token::OptType)
| Some(Token::MapType)
| Some(Token::SumType)
);
if !prev_is_ident_flush
&& !prev_token_is_dot_flush(&tokens, span.start)
&& !prev_is_type_position
&& let Some((word, end)) = scan_uppercase_run(normalized, span.start)
&& word.len() >= 2
&& let Some((canonical, hint)) = logical_keyword_message(&word)
{
return Err(LexError {
code: "ILO-L001",
position: span.start,
snippet: normalized[span.start..end].to_string(),
suggestion: format!(
"`{word}` is not an ilo keyword. ilo uses `{canonical}` ({hint})"
),
});
}
let prev_info = tokens.last().and_then(|(t, s)| match t {
Token::Ident(name) if s.end == span.start => {
Some((name.clone(), s.clone()))
}
_ => None,
});
if let Some((prev_name, prev_span)) = prev_info {
if prev_ident_is_post_dot(&tokens) {
if let Some(_consumed) = absorb_camel_tail(
normalized,
span.start,
span.end,
&mut lexer,
&mut tokens,
) {
continue;
}
}
let sigil_char = normalized[span.clone()].chars().next().unwrap();
return Err(uppercase_mid_ident_error_with_source(
&prev_name,
sigil_char,
&normalized[span.end..],
prev_span.start,
Some(normalized),
));
}
if prev_token_is_dot_flush(&tokens, span.start) {
if let Some(_consumed) = emit_ident_at_dot(
normalized,
span.start,
span.end,
&mut lexer,
&mut tokens,
) {
continue;
}
}
}
tokens.push((token, span));
}
Err(()) => {
let span = lexer.span();
let bad = &normalized[span.clone()];
if bad.len() == 1 && bad.chars().next().unwrap().is_ascii_uppercase() {
let prev_info = tokens.last().and_then(|(t, s)| match t {
Token::Ident(name) if s.end == span.start => {
Some((name.clone(), s.clone()))
}
_ => None,
});
if let Some((prev_name, prev_span)) = prev_info {
if prev_ident_is_post_dot(&tokens) {
if let Some(_consumed) = absorb_camel_tail(
normalized,
span.start,
span.end,
&mut lexer,
&mut tokens,
) {
continue;
}
}
let c = bad.chars().next().unwrap();
return Err(uppercase_mid_ident_error_with_source(
&prev_name,
c,
&normalized[span.end..],
prev_span.start,
Some(normalized),
));
}
if prev_token_is_dot_flush(&tokens, span.start) {
if let Some(_consumed) = emit_ident_at_dot(
normalized,
span.start,
span.end,
&mut lexer,
&mut tokens,
) {
continue;
}
}
}
if bad.len() == 1
&& bad.chars().next().unwrap().is_ascii_uppercase()
&& let Some((word, end)) = scan_uppercase_run(normalized, span.start)
&& let Some((canonical, hint)) = logical_keyword_message(&word)
{
return Err(LexError {
code: "ILO-L001",
position: span.start,
snippet: normalized[span.start..end].to_string(),
suggestion: format!(
"`{word}` is not an ilo keyword. ilo uses `{canonical}` ({hint})"
),
});
}
if bad == "\\"
&& let Some(hint) = backslash_lambda_hint(normalized, span.end)
{
return Err(LexError {
code: "ILO-L001",
position: span.start,
snippet: bad.to_string(),
suggestion: hint,
});
}
let (code, suggestion) = lex_error_kind(bad);
return Err(LexError {
code,
position: span.start,
snippet: bad.to_string(),
suggestion,
});
}
}
}
{
let mut i = 0;
while i < tokens.len() {
if i == 0 {
i += 1;
continue;
}
let prev_is_dot = matches!(tokens[i - 1].0, Token::Dot | Token::DotQuestion)
&& tokens[i - 1].1.end == tokens[i].1.start;
if !prev_is_dot {
i += 1;
continue;
}
let Token::Number(_) = tokens[i].0 else {
i += 1;
continue;
};
let span = tokens[i].1.clone();
let slice = &normalized[span.clone()];
if slice.contains('e') || slice.contains('E') || slice.starts_with('-') {
i += 1;
continue;
}
let Some(dot_at) = slice.find('.') else {
i += 1;
continue;
};
let head = &slice[..dot_at];
let tail = &slice[dot_at + 1..];
let (Ok(h), Ok(t)) = (head.parse::<f64>(), tail.parse::<f64>()) else {
i += 1;
continue;
};
let head_span = span.start..span.start + dot_at;
let dot_span = span.start + dot_at..span.start + dot_at + 1;
let tail_span = span.start + dot_at + 1..span.end;
tokens.splice(
i..i + 1,
[
(Token::Number(h), head_span),
(Token::Dot, dot_span),
(Token::Number(t), tail_span),
],
);
i += 3;
}
}
{
let mut i = 0;
while i < tokens.len() {
let Token::Number(_) = tokens[i].0 else {
i += 1;
continue;
};
let span = tokens[i].1.clone();
let slice = &normalized[span.clone()];
if !slice.starts_with('-') {
i += 1;
continue;
}
let prev_splits = i == 0
|| matches!(
tokens[i - 1].0,
Token::Semi
| Token::Newline
| Token::Eq
| Token::LBrace
| Token::LParen
| Token::Minus
);
if !prev_splits {
i += 1;
continue;
}
let positive_slice = &slice[1..];
let Ok(n) = positive_slice.parse::<f64>() else {
i += 1;
continue;
};
let minus_span = span.start..span.start + 1;
let number_span = span.start + 1..span.end;
tokens.splice(
i..i + 1,
[(Token::Minus, minus_span), (Token::Number(n), number_span)],
);
i += 2;
}
}
{
let mut i = 0;
while i < tokens.len() {
if i == 0 {
i += 1;
continue;
}
let prev_is_dot = matches!(tokens[i - 1].0, Token::Dot | Token::DotQuestion)
&& tokens[i - 1].1.end == tokens[i].1.start;
if !prev_is_dot {
i += 1;
continue;
}
if matches!(tokens[i].0, Token::Ident(_) | Token::Number(_)) {
i += 1;
continue;
}
let span = tokens[i].1.clone();
let slice = &normalized[span.clone()];
let mut chars = slice.chars();
let first_ok = chars
.next()
.map(|c| c.is_ascii_alphabetic())
.unwrap_or(false);
let rest_ok = chars.all(|c| c.is_ascii_alphanumeric() || c == '_');
if !first_ok || !rest_ok {
i += 1;
continue;
}
tokens[i] = (Token::Ident(slice.to_string()), span);
i += 1;
}
}
let mut i = 0;
while i + 2 < tokens.len() {
let prev_is_dot = i > 0
&& matches!(tokens[i - 1].0, Token::Dot | Token::DotQuestion)
&& tokens[i - 1].1.end == tokens[i].1.start;
if !prev_is_dot {
i += 1;
continue;
}
if !matches!(tokens[i].0, Token::Ident(_)) {
i += 1;
continue;
}
let mut j = i + 1;
let mut has_underscore = false;
while j + 1 < tokens.len()
&& tokens[j].0 == Token::Underscore
&& tokens[j - 1].1.end == tokens[j].1.start
&& tokens[j].1.end == tokens[j + 1].1.start
{
match &tokens[j + 1].0 {
Token::Ident(_) => {
has_underscore = true;
j += 2;
}
Token::Number(n) if n.fract() == 0.0 && *n >= 0.0 => {
has_underscore = true;
j += 2;
if j < tokens.len()
&& tokens[j - 1].1.end == tokens[j].1.start
&& matches!(tokens[j].0, Token::Ident(_))
{
j += 1;
}
}
_ => break,
}
}
if !has_underscore {
i += 1;
continue;
}
let start = tokens[i].1.start;
let end = tokens[j - 1].1.end;
let merged = normalized[start..end].to_string();
let new_tok = (Token::Ident(merged), start..end);
tokens.splice(i..j, std::iter::once(new_tok));
i += 1;
}
for i in 0..tokens.len().saturating_sub(2) {
let (a, sa) = (&tokens[i].0, &tokens[i].1);
let (b, sb) = (&tokens[i + 1].0, &tokens[i + 1].1);
let (c, sc) = (&tokens[i + 2].0, &tokens[i + 2].1);
if matches!(a, Token::Ident(_))
&& *b == Token::Underscore
&& matches!(c, Token::Ident(_))
&& sa.end == sb.start
&& sb.end == sc.start
{
let Token::Ident(ap) = a else { unreachable!() };
let Token::Ident(cp) = c else { unreachable!() };
let mut combined = format!("{ap}_{cp}");
let mut end = sc.end;
let mut j = i + 3;
while j + 1 < tokens.len()
&& tokens[j].0 == Token::Underscore
&& matches!(tokens[j + 1].0, Token::Ident(_))
&& tokens[j - 1].1.end == tokens[j].1.start
&& tokens[j].1.end == tokens[j + 1].1.start
{
if let Token::Ident(s) = &tokens[j + 1].0 {
combined.push('_');
combined.push_str(s);
}
end = tokens[j + 1].1.end;
j += 2;
}
return Err(LexError {
code: "ILO-L002",
position: sa.start,
snippet: normalized[sa.start..end].to_string(),
suggestion: format!(
"underscores are not allowed in identifiers; use hyphens (e.g. `{}`)",
combined.replace('_', "-")
),
});
}
}
Ok(tokens)
}
fn prev_ident_is_post_dot(tokens: &[(Token, std::ops::Range<usize>)]) -> bool {
let n = tokens.len();
if n < 2 {
return false;
}
let (last_tok, last_span) = &tokens[n - 1];
let (prev_tok, prev_span) = &tokens[n - 2];
matches!(last_tok, Token::Ident(_))
&& matches!(prev_tok, Token::Dot | Token::DotQuestion)
&& prev_span.end == last_span.start
}
fn absorb_camel_tail(
normalized: &str,
span_start: usize,
span_end: usize,
lexer: &mut logos::Lexer<'_, Token>,
tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
) -> Option<usize> {
let bytes = normalized.as_bytes();
let mut end = span_start;
while end < bytes.len() {
let b = bytes[end];
if b.is_ascii_alphanumeric() {
end += 1;
} else {
break;
}
}
if end == span_start {
return None;
}
let (prev_tok, prev_span) = tokens.pop()?;
let Token::Ident(_) = prev_tok else {
tokens.push((prev_tok, prev_span));
return None;
};
let merged_span = prev_span.start..end;
let merged = normalized[merged_span.clone()].to_string();
tokens.push((Token::Ident(merged), merged_span));
let bump = end.saturating_sub(span_end);
if bump > 0 {
lexer.bump(bump);
}
Some(end)
}
fn prev_token_is_dot_flush(tokens: &[(Token, std::ops::Range<usize>)], span_start: usize) -> bool {
let Some((tok, sp)) = tokens.last() else {
return false;
};
matches!(tok, Token::Dot | Token::DotQuestion) && sp.end == span_start
}
fn emit_ident_at_dot(
normalized: &str,
span_start: usize,
span_end: usize,
lexer: &mut logos::Lexer<'_, Token>,
tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
) -> Option<usize> {
let bytes = normalized.as_bytes();
let mut end = span_start;
while end < bytes.len() {
if bytes[end].is_ascii_alphanumeric() {
end += 1;
} else {
break;
}
}
if end == span_start {
return None;
}
let new_span = span_start..end;
let new_ident = normalized[new_span.clone()].to_string();
tokens.push((Token::Ident(new_ident), new_span));
let bump = end.saturating_sub(span_end);
if bump > 0 {
lexer.bump(bump);
}
Some(end)
}
fn is_type_sigil(t: &Token) -> bool {
matches!(
t,
Token::ListType
| Token::ResultType
| Token::FnType
| Token::OptType
| Token::MapType
| Token::SumType
)
}
fn uppercase_mid_ident_error_with_source(
prev: &str,
cap: char,
rest_after_cap: &str,
start: usize,
source: Option<&str>,
) -> LexError {
let trailing: String = rest_after_cap
.chars()
.take_while(|c| c.is_ascii_alphanumeric() || *c == '-')
.collect();
let offset = prev.len();
let full = format!("{prev}{cap}{trailing}");
let lower = full.to_lowercase();
let hyphenated = hyphenate_camel(&full);
let mut suggestion = format!(
"identifiers must be lowercase ASCII; got '{full}' (capital '{cap}' at offset {offset}). Use lowercase, e.g. `{hyphenated}` or `{lower}`"
);
if let Some(src) = source {
let mut extras: Vec<String> = Vec::new();
for offender in scan_camel_offenders(src) {
if offender.start == start {
continue;
}
if offender.full == full {
continue;
}
if !extras.iter().any(|e| e == &offender.full) {
extras.push(offender.full);
}
}
if !extras.is_empty() {
let preview: Vec<String> = extras.iter().take(5).cloned().collect();
let more = if extras.len() > preview.len() {
format!(" (+{} more)", extras.len() - preview.len())
} else {
String::new()
};
suggestion.push_str(&format!(
". Also found in this file: {}{}",
preview.join(", "),
more
));
}
}
LexError {
code: "ILO-L003",
position: start,
snippet: full.clone(),
suggestion,
}
}
fn hyphenate_camel(full: &str) -> String {
let mut s = String::with_capacity(full.len() + 2);
for (i, c) in full.chars().enumerate() {
if i > 0 && c.is_ascii_uppercase() && !s.ends_with('-') {
s.push('-');
}
s.push(c.to_ascii_lowercase());
}
s
}
#[derive(Debug)]
struct CamelOffender {
start: usize,
full: String,
}
fn scan_camel_offenders(src: &str) -> Vec<CamelOffender> {
let bytes = src.as_bytes();
let mut out: Vec<CamelOffender> = Vec::new();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'"' {
i += 1;
while i < bytes.len() {
let c = bytes[i];
if c == b'\\' {
i += 2;
continue;
}
if c == b'"' {
i += 1;
break;
}
i += 1;
}
continue;
}
if b == b'-' && i + 1 < bytes.len() && bytes[i + 1] == b'-' {
i += 2;
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
let prev = if i == 0 { 0 } else { bytes[i - 1] };
let prev_prev = if i >= 2 { bytes[i - 2] } else { 0 };
let is_post_dot = prev == b'.' || (prev == b'?' && prev_prev == b'.');
if b.is_ascii_lowercase() && !(prev.is_ascii_alphanumeric() || prev == b'_' || prev == b'-')
{
let start = i;
let mut j = i;
let mut found_cap = false;
while j < bytes.len() {
let c = bytes[j];
if c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'-' {
j += 1;
} else if c.is_ascii_uppercase() && j > start {
found_cap = true;
j += 1;
} else {
break;
}
}
if found_cap && !is_post_dot {
while j < bytes.len() {
let c = bytes[j];
if c.is_ascii_alphanumeric() || c == b'-' {
j += 1;
} else {
break;
}
}
if let Ok(full) = std::str::from_utf8(&bytes[start..j]) {
out.push(CamelOffender {
start,
full: full.to_string(),
});
}
}
i = j.max(i + 1);
} else {
i += 1;
}
}
out
}
fn backslash_lambda_hint(source: &str, after_backslash: usize) -> Option<String> {
let bytes = source.as_bytes();
let mut i = after_backslash;
if i < bytes.len() && bytes[i] == b' ' {
i += 1;
}
if i >= bytes.len() || !bytes[i].is_ascii_lowercase() {
return None;
}
let param_start = i;
while i < bytes.len()
&& (bytes[i].is_ascii_lowercase() || bytes[i].is_ascii_digit() || bytes[i] == b'-')
{
i += 1;
}
let param = &source[param_start..i];
if param.is_empty() {
return None;
}
while i < bytes.len() && bytes[i] == b' ' {
i += 1;
}
if i >= bytes.len() {
return None;
}
let typed = bytes[i] == b':';
let looks_like_lambda = bytes[i] == b'{'
|| (i + 1 < bytes.len() && bytes[i] == b'-' && bytes[i + 1] == b'>')
|| typed;
if !looks_like_lambda {
return None;
}
Some(format!(
"`\\{param}{}` is a Haskell/Rust/ML lambda shorthand. ilo has two canonical lambda forms — paren (with types) and brace (no types): at a HOF call site write `map ({param}:t>r;body) xs` or `map {{{param}> body}} xs`. Example: `map ({param}:n>n;+{param} 1) xs` or `map {{{param}> +{param} 1}} xs`.",
if typed {
":t>body"
} else if bytes[i] == b'{' {
"{body}"
} else {
" -> body"
}
))
}
fn scan_uppercase_run(source: &str, start: usize) -> Option<(String, usize)> {
let bytes = source.as_bytes();
let mut end = start;
while end < bytes.len() && bytes[end].is_ascii_uppercase() {
end += 1;
}
if end == start {
return None;
}
Some((source[start..end].to_string(), end))
}
fn logical_keyword_message(word: &str) -> Option<(&'static str, &'static str)> {
match word {
"AND" => Some(("&", "single `&` for logical and")),
"OR" => Some(("|", "single `|` for logical or")),
"NOT" => Some(("!", "prefix `!` for logical not")),
_ => None,
}
}
fn lex_error_kind(bad_token: &str) -> (&'static str, String) {
if bad_token.contains('_') && bad_token.len() > 1 {
(
"ILO-L002",
format!(
"Use hyphens instead of underscores: '{}'",
bad_token.replace('_', "-")
),
)
} else if bad_token.chars().next().is_some_and(|c| c.is_uppercase()) && bad_token.len() > 1 {
(
"ILO-L003",
format!("Use lowercase: '{}'", bad_token.to_lowercase()),
)
} else {
(
"ILO-L001",
format!("Unexpected character(s): '{bad_token}'"),
)
}
}
#[derive(Debug, thiserror::Error)]
#[error("Lex error at position {position}: '{snippet}'. {suggestion}")]
pub struct LexError {
pub code: &'static str,
pub position: usize,
pub snippet: String,
pub suggestion: String,
}
#[cfg(test)]
#[allow(clippy::approx_constant)]
mod tests {
use super::*;
#[test]
fn lex_simple_function() {
let source = "tot p:n q:n r:n>n;s=*p q;t=*s r;+s t";
let tokens = lex(source).unwrap();
assert!(!tokens.is_empty());
assert_eq!(tokens[0].0, Token::Ident("tot".to_string()));
}
#[test]
fn lex_operators() {
let source = ">=<=!=><+-*/";
let tokens = lex(source).unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::GreaterEq,
Token::LessEq,
Token::NotEq,
Token::Greater,
Token::Less,
Token::Plus,
Token::Minus,
Token::Star,
Token::Slash,
]
);
}
#[test]
fn lex_special_tokens() {
let source = "?@!^~$";
let tokens = lex(source).unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::Question,
Token::At,
Token::Bang,
Token::Caret,
Token::Tilde,
Token::Dollar
]
);
}
#[test]
fn lex_type_constructors() {
let source = "L R";
let tokens = lex(source).unwrap();
assert_eq!(tokens[0].0, Token::ListType);
assert_eq!(tokens[1].0, Token::ResultType);
}
#[test]
fn lex_keywords_vs_idents() {
let source = "type tool with timeout retry";
let tokens = lex(source).unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::Type,
Token::Tool,
Token::With,
Token::Timeout,
Token::Retry,
]
);
}
#[test]
fn lex_string_literal() {
let source = r#""hello world""#;
let tokens = lex(source).unwrap();
assert_eq!(tokens[0].0, Token::Text("hello world".to_string()));
}
#[test]
fn lex_string_escapes_full_set() {
let cases = [
(r#""\n""#, "\n"),
(r#""\t""#, "\t"),
(r#""\r""#, "\r"),
(r#""\"""#, "\""),
(r#""\\""#, "\\"),
(r#""\f""#, "\u{000C}"),
(r#""\b""#, "\u{0008}"),
(r#""\v""#, "\u{000B}"),
(r#""\a""#, "\u{0007}"),
(r#""\0""#, "\u{0000}"),
(r#""\/""#, "/"),
(r#""page1\fpage2""#, "page1\u{000C}page2"),
];
for (src, expected) in cases {
let tokens = lex(src).unwrap();
assert_eq!(
tokens[0].0,
Token::Text(expected.to_string()),
"escape decode mismatch for {src}"
);
}
}
#[test]
fn lex_string_unknown_escape_passes_through() {
let tokens = lex(r#""\z""#).unwrap();
assert_eq!(tokens[0].0, Token::Text("\\z".to_string()));
}
#[test]
fn lex_comment_ignored() {
let source = "-- this is a comment\ntot";
let tokens = lex(source).unwrap();
assert!(
tokens
.iter()
.any(|(t, _)| *t == Token::Ident("tot".to_string()))
);
}
#[test]
fn lex_punctuation() {
let source = ":;.,{}()_";
let tokens = lex(source).unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::Colon,
Token::Semi,
Token::Dot,
Token::Comma,
Token::LBrace,
Token::RBrace,
Token::LParen,
Token::RParen,
Token::Underscore,
]
);
}
#[test]
fn lex_number_literals() {
let source = "42 3.14 -7";
let tokens = lex(source).unwrap();
assert_eq!(tokens[0].0, Token::Number(42.0));
assert_eq!(tokens[1].0, Token::Number(3.14));
assert_eq!(tokens[2].0, Token::Number(-7.0));
}
#[test]
fn lex_neg_zero_at_start_splits_into_minus_number() {
let source = "-0 v";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::Minus,
Token::Number(0.0),
Token::Ident("v".to_string()),
]
);
}
#[test]
fn lex_neg_literal_after_semi_splits() {
let source = "v=p;-0 v";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(tokens[3], Token::Semi);
assert_eq!(tokens[4], Token::Minus);
assert_eq!(tokens[5], Token::Number(0.0));
assert_eq!(tokens[6], Token::Ident("v".to_string()));
}
#[test]
fn lex_neg_literal_after_eq_splits() {
let source = "r1=-1 t2";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(tokens[0], Token::Ident("r1".to_string()));
assert_eq!(tokens[1], Token::Eq);
assert_eq!(tokens[2], Token::Minus);
assert_eq!(tokens[3], Token::Number(1.0));
assert_eq!(tokens[4], Token::Ident("t2".to_string()));
}
#[test]
fn lex_neg_literal_after_lbrace_splits() {
let source = "{-0 v}";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(tokens[0], Token::LBrace);
assert_eq!(tokens[1], Token::Minus);
assert_eq!(tokens[2], Token::Number(0.0));
assert_eq!(tokens[3], Token::Ident("v".to_string()));
assert_eq!(tokens[4], Token::RBrace);
}
#[test]
fn lex_neg_literal_after_lparen_splits() {
let source = "(-0 v)";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(tokens[0], Token::LParen);
assert_eq!(tokens[1], Token::Minus);
assert_eq!(tokens[2], Token::Number(0.0));
assert_eq!(tokens[3], Token::Ident("v".to_string()));
assert_eq!(tokens[4], Token::RParen);
}
#[test]
fn lex_neg_literal_after_ident_stays_literal() {
let source = "at xs -1";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::Ident("at".to_string()),
Token::Ident("xs".to_string()),
Token::Number(-1.0),
]
);
}
#[test]
fn lex_neg_literal_mid_list_stays_literal() {
let source = "[1 -2 3]";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::LBracket,
Token::Number(1.0),
Token::Number(-2.0),
Token::Number(3.0),
Token::RBracket,
]
);
}
#[test]
fn lex_neg_literal_after_lbracket_stays_literal() {
let source = "[-2 1 3]";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::LBracket,
Token::Number(-2.0),
Token::Number(1.0),
Token::Number(3.0),
Token::RBracket,
]
);
}
#[test]
fn lex_neg_float_at_start_splits() {
let source = "-0.05 r";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(tokens[0], Token::Minus);
assert_eq!(tokens[1], Token::Number(0.05));
assert_eq!(tokens[2], Token::Ident("r".to_string()));
}
#[test]
fn lex_neg_literal_after_prefix_binop_operand_stays() {
let source = "+a -3";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::Plus,
Token::Ident("a".to_string()),
Token::Number(-3.0),
]
);
}
#[test]
fn lex_neg_zero_after_minus_splits() {
let source = "- -0 a bo";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::Minus,
Token::Minus,
Token::Number(0.0),
Token::Ident("a".to_string()),
Token::Ident("bo".to_string()),
]
);
}
#[test]
fn lex_neg_int_after_minus_splits() {
let source = "- -3 5";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![
Token::Minus,
Token::Minus,
Token::Number(3.0),
Token::Number(5.0),
]
);
}
#[test]
fn lex_neg_literal_after_plus_stays() {
let source = "+ -3 5";
let tokens: Vec<_> = lex(source).unwrap().into_iter().map(|(t, _)| t).collect();
assert_eq!(
tokens,
vec![Token::Plus, Token::Number(-3.0), Token::Number(5.0),]
);
}
#[test]
fn lex_booleans() {
let source = "true false";
let tokens = lex(source).unwrap();
assert_eq!(tokens[0].0, Token::True);
assert_eq!(tokens[1].0, Token::False);
}
#[test]
fn lex_idea9_example01() {
let source = "tot p:n q:n r:n>n;s=*p q;t=*s r;+s t";
let tokens = lex(source).unwrap();
assert!(tokens.len() > 10);
}
#[test]
fn lex_idea9_example03() {
let source = r#"cls sp:n>t;>=sp 1000{"gold"};>=sp 500{"silver"};"bronze""#;
let tokens = lex(source).unwrap();
assert!(tokens.len() > 5);
}
#[test]
fn lex_dollar_token() {
let tokens = lex("$").unwrap();
assert_eq!(tokens[0].0, Token::Dollar);
}
#[test]
fn lex_double_equals_is_eq() {
let single = lex("=a b").unwrap();
let double = lex("==a b").unwrap();
assert_eq!(single[0].0, Token::Eq);
assert_eq!(double[0].0, Token::Eq);
assert_eq!(single[1].0, double[1].0);
}
#[test]
fn lex_assign_then_equality_with_double_eq() {
let tokens = lex("e==c n").unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::Ident("e".to_string()),
Token::Eq,
Token::Ident("c".to_string()),
Token::Ident("n".to_string()),
]
);
}
#[test]
fn lex_dotdot_token() {
let tokens = lex("0..3").unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![Token::Number(0.0), Token::DotDot, Token::Number(3.0)]
);
}
#[test]
fn lex_dot_vs_dotdot() {
let tokens = lex("x.y").unwrap();
let types: Vec<_> = tokens.iter().map(|(t, _)| t.clone()).collect();
assert_eq!(
types,
vec![
Token::Ident("x".to_string()),
Token::Dot,
Token::Ident("y".to_string())
]
);
}
#[test]
fn lex_suggest_fix_underscore() {
let (code, suggestion) = super::lex_error_kind("my_func");
assert_eq!(code, "ILO-L002");
assert!(suggestion.contains("my-func"), "got: {}", suggestion);
}
#[test]
fn lex_suggest_fix_uppercase() {
let (code, suggestion) = super::lex_error_kind("MyFunc");
assert_eq!(code, "ILO-L003");
assert!(suggestion.contains("myfunc"), "got: {}", suggestion);
}
#[test]
fn lex_suggest_fix_generic() {
let (code, suggestion) = super::lex_error_kind("#");
assert_eq!(code, "ILO-L001");
assert!(
suggestion.contains("Unexpected character"),
"got: {}",
suggestion
);
}
#[test]
fn normalize_inline_unchanged() {
assert_eq!(normalize_newlines("dbl x:n>n;*x 2"), "dbl x:n>n;*x 2");
}
#[test]
fn normalize_indented_body() {
assert_eq!(
normalize_newlines("greet name:t>t\n +\"hello \" name"),
"greet name:t>t;+\"hello \" name"
);
}
#[test]
fn normalize_multi_statement() {
assert_eq!(
normalize_newlines("calc a:n b:n>n\n s=+a b\n p=*a b\n +s p"),
"calc a:n b:n>n;s=+a b;p=*a b;+s p"
);
}
#[test]
fn normalize_separate_functions_preserved() {
let src = "dbl x:n>n;*x 2\ninc x:n>n;+x 1";
let result = normalize_newlines(src);
assert!(
result.contains('\n'),
"newline between functions should be preserved: {result}"
);
}
#[test]
fn normalize_type_def_braces() {
assert_eq!(
normalize_newlines("type point{\n x:n\n y:n\n}"),
"type point{x:n;y:n}"
);
}
#[test]
fn normalize_nested_braces() {
assert_eq!(
normalize_newlines("cls sp:n>t\n >=sp 1000{\n \"gold\"\n }\n \"bronze\""),
"cls sp:n>t;>=sp 1000{\"gold\"};\"bronze\""
);
}
#[test]
fn lex_uppercase_and_emits_friendly_hint() {
let err = lex("main>b;AND a b").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
err.suggestion.contains("AND"),
"suggestion: {}",
err.suggestion
);
assert!(
err.suggestion.contains("ilo uses `&`"),
"suggestion: {}",
err.suggestion
);
assert_eq!(err.snippet, "AND");
}
#[test]
fn lex_uppercase_or_emits_friendly_hint() {
let err = lex("main>b;OR a b").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
err.suggestion.contains("OR"),
"suggestion: {}",
err.suggestion
);
assert!(
err.suggestion.contains("ilo uses `|`"),
"suggestion: {}",
err.suggestion
);
assert_eq!(err.snippet, "OR");
}
#[test]
fn lex_uppercase_not_emits_friendly_hint() {
let err = lex("main>b;NOT a").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
err.suggestion.contains("NOT"),
"suggestion: {}",
err.suggestion
);
assert!(
err.suggestion.contains("ilo uses `!`"),
"suggestion: {}",
err.suggestion
);
}
#[test]
fn lex_post_dot_uppercase_keyword_not_treated_as_logical() {
let tokens = lex("f r:R n t>n;r.OR").unwrap();
assert!(
tokens
.iter()
.any(|(t, _)| matches!(t, Token::Ident(s) if s == "OR")),
"expected post-dot `OR` Ident; tokens: {:?}",
tokens
);
}
#[test]
fn lex_mid_ident_uppercase_keyword_still_l003() {
let err = lex("main>b;fooAND a b").unwrap_err();
assert_eq!(err.code, "ILO-L003");
}
#[test]
fn lex_type_position_or_is_optional_result_not_logical() {
let tokens = lex("f x:OR n n>n;??x 0").unwrap();
assert!(
tokens.iter().any(|(t, _)| matches!(t, Token::OptType)),
"expected OptType in tokens: {:?}",
tokens
);
assert!(
tokens.iter().any(|(t, _)| matches!(t, Token::ResultType)),
"expected ResultType in tokens: {:?}",
tokens
);
}
#[test]
fn lex_backslash_lambda_brace_emits_hint() {
let err = lex("f xs:L n>L n;map \\x{+x 1} xs").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert_eq!(err.snippet, "\\");
assert!(
err.suggestion.contains("Haskell/Rust"),
"suggestion should call out the source language: {}",
err.suggestion
);
assert!(
err.suggestion.contains("(x:t>r;body)"),
"suggestion should point at the canonical parenthesised lambda form: {}",
err.suggestion
);
}
#[test]
fn lex_backslash_lambda_arrow_emits_hint() {
let err = lex("f xs:L n>L n;map \\x -> +x 1 xs").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
err.suggestion.contains("Haskell/Rust"),
"suggestion: {}",
err.suggestion
);
}
#[test]
fn lex_backslash_lambda_space_param_emits_hint() {
let err = lex("f xs:L n>L n;map \\ x{+x 1} xs").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
err.suggestion.contains("(x:t>r;body)"),
"suggestion: {}",
err.suggestion
);
}
#[test]
fn lex_lone_backslash_no_lambda_hint() {
let err = lex("f x:n>n;\\").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
!err.suggestion.contains("Haskell/Rust"),
"lone `\\` should not surface the lambda hint, got: {}",
err.suggestion
);
}
#[test]
fn lex_backslash_followed_by_ident_no_brace_no_hint() {
let err = lex("f x:n>n;\\foo bar").unwrap_err();
assert_eq!(err.code, "ILO-L001");
assert!(
!err.suggestion.contains("Haskell/Rust"),
"`\\foo bar` (no brace/arrow) should not surface the lambda hint, got: {}",
err.suggestion
);
}
#[test]
fn lex_type_position_after_greater_or_is_optional_result() {
let tokens = lex("f>OR n n;~42").unwrap();
assert!(
tokens.iter().any(|(t, _)| matches!(t, Token::OptType)),
"expected OptType in tokens: {:?}",
tokens
);
}
#[test]
fn normalize_blank_line_between_indented_statements_is_continuation() {
let src = "main>n;\n x=42;\n\n prnt x;\n 0\n";
let got = normalize_newlines(src);
assert!(
!got.trim_end_matches('\n').contains('\n'),
"blank line should not produce mid-body newline: {:?}",
got
);
assert_eq!(got, "main>n;x=42;prnt x;0\n");
}
#[test]
fn normalize_multiple_consecutive_blank_lines_collapse() {
let src = "main>n;\n x=42;\n\n\n\n prnt x;\n 0\n";
let got = normalize_newlines(src);
assert_eq!(got, "main>n;x=42;prnt x;0\n");
}
#[test]
fn normalize_blank_line_with_trailing_whitespace_is_continuation() {
let src = "main>n;\n x=42;\n \n\t\n prnt x;\n 0\n";
let got = normalize_newlines(src);
assert_eq!(got, "main>n;x=42;prnt x;0\n");
}
#[test]
fn normalize_blank_line_before_non_indented_keeps_decl_boundary() {
let src = "f>n;1\n\ng>n;2\n";
let got = normalize_newlines(src);
assert!(
got.contains('\n'),
"blank line before top-level decl must keep newline: {:?}",
got
);
assert_eq!(got, "f>n;1\ng>n;2\n");
}
#[test]
fn normalize_blank_lines_at_start_of_function_body() {
let src = "main>n;\n\n x=42;\n prnt x;\n 0\n";
let got = normalize_newlines(src);
assert_eq!(got, "main>n;x=42;prnt x;0\n");
}
#[test]
fn normalize_trailing_blank_lines_at_eof() {
let src = "main>n;\n x=42;\n 0\n\n\n";
let got = normalize_newlines(src);
assert!(got.starts_with("main>n;x=42;0"));
}
#[test]
fn normalize_prnt_fmt_then_blank_line_parses() {
let src = "main>n;\n x=42;\n prnt fmt \"x={}\" x;\n\n y=99;\n prnt y;\n 0\n";
let got = normalize_newlines(src);
assert!(
!got.trim_end_matches('\n').contains('\n'),
"prnt fmt + blank line + next stmt must collapse: {:?}",
got
);
let tokens = lex(src).unwrap();
assert!(!tokens.is_empty(), "lex must produce tokens");
}
#[test]
fn normalize_blank_line_offset_map_preserves_span_fidelity() {
let src = "main>n;\n x=42;\n\n y=99;\n prnt y;\n 0\n";
let (normalized, map) = normalize_newlines_with_map(src);
assert_eq!(map.len(), normalized.len() + 1);
let src_len = src.len() as u32;
for &off in &map {
assert!(off <= src_len, "map offset {} > src.len() {}", off, src_len);
}
let y_pos_normalized = normalized.find("y=99").expect("y=99 in normalized");
let y_pos_original = map[y_pos_normalized] as usize;
assert_eq!(
&src[y_pos_original..y_pos_original + 4],
"y=99",
"span remap landed at wrong original byte"
);
}
}