zuzu-rust 0.6.0

from std/path/z/lexer import Lexer;
from std/path/z/evaluate import Evaluator;
from std/path/z/operators import Operator;
from test/more import *;

let e := new Evaluator();

function token_kinds ( src ) {
	let lx := new Lexer( src: src, allowed_operators: e.operator_definitions() );
	let out := [];
	while ( true ) {
		let t := lx.next_tok();
		out.push( t{k} );
		last if t{k} ≡ "EOF";
	}
	return out;
}

let lx := new Lexer( src: "..*", allowed_operators: e.operator_definitions() );
is( lx.peek_kind(), "DOTDOTSTAR", "peek_kind on first token" );
is( lx.peek_kind_n(1), "EOF", "peek_kind_n sees lookahead" );
is( lx.next_tok(){k}, "DOTDOTSTAR", "next_tok returns token" );
is( lx.expect( "EOF" ){k}, "EOF", "expect returns matching token" );

let quoted := new Lexer( src: "@name == \"a\\\\b\\\"c\"", allowed_operators: e.operator_definitions() );
while ( quoted.peek_kind() ≢ "STRING" ) {
	quoted.next_tok();
}
let st := quoted.next_tok();
is( st{v}, "a\\b\"c", "double-quoted string supports \\\\, \\\"" );
is( st{q}, "\"", "string token records quote type (double)" );

let squoted := new Lexer( src: "@name == 'a\\\\b\\\'c'", allowed_operators: e.operator_definitions() );
while ( squoted.peek_kind() ≢ "STRING" ) {
	squoted.next_tok();
}
let st2 := squoted.next_tok();
is( st2{v}, "a\\b'c", "single-quoted string supports \\\\, \\\'" );
is( st2{q}, "'", "string token records quote type (single)" );

let neg := new Lexer( src: "items/#-12", allowed_operators: e.operator_definitions() );
while ( neg.peek_kind() ≢ "INDEX" ) {
	neg.next_tok();
}
is( neg.next_tok(){v}, -12, "index token accepts negative numbers" );

let elvis := new Lexer(
	src: "value ?: fallback",
	allowed_operators: [
		new Operator(
			spelling: "?:",
			kind: "ELVIS",
			precedence: 1,
		),
	],
);
while ( elvis.peek_kind() ≢ "ELVIS" ) {
	elvis.next_tok();
}
is( elvis.next_tok(){v}, "?:", "Elvis opt-in operator tokenizes as one token" );

like(
	exception( function () {
		new Lexer( src: "value ?: fallback", allowed_operators: e.operator_definitions() );
	} ),
	/Ternary operator '\?' requires whitespace/,
	"ZPath operator table does not opt in to Elvis tokenization",
);

let op_lx := new Lexer( src: "a && b", allowed_operators: e.operator_definitions() );
let op_table := op_lx.known_operators();
ok( op_table.length() > 0, "known_operators returns operator table" );
is( op_table[1]{spelling}, "&&", "operator table includes spelling" );
is( op_table[1]{kind}, "ANDAND", "operator table includes kind" );

let custom_chars := [ "i", "s", "S", "a", "m", "e", "A", "s" ];
let custom_op := op_lx._operator_at(
	custom_chars,
	0,
	custom_chars.length(),
	[ { spelling: "isSameAs", kind: "SAMEAS", require_ws: true } ],
);
is( custom_op{kind}, "SAMEAS", "arbitrary-length operator matches via table" );

let examples := [
	"..*",
	"../tr[index() == 0]/td",
	"body[**/tr[count(td) == 2]]",
	"car[!age || type(age) == \"null\"]",
	"union(**/bowl/*, **/fruit)",
	"table[@class == \"defn\"]",
	"[@* == \"defn\"]",
	"list/*[index() % 2 == 0]",
];

for ( let sample in examples ) {
	let kinds := token_kinds(sample);
	ok( kinds.length() > 1, `tokenized example: ${sample}` );
	is( kinds[ kinds.length() - 1 ], "EOF", `example ends with EOF: ${sample}` );
}

done_testing();