const PREC = {
comment: 1,
using_directive: 2,
control: 1,
stable_type_id: 2,
type: 2,
while: 2,
assign: 3,
case: 3,
stable_id: 4,
unit: 4,
ascription: 4,
postfix: 5,
colon_call: 5,
infix: 6,
constructor_app: 7,
prefix: 7,
compound: 7,
call: 8,
field: 8,
macro: 10,
binding: 10,
};
module.exports = grammar({
name: "scala",
extras: $ => [/\s/, $.comment, $.block_comment],
supertypes: $ => [$.expression, $._definition, $._pattern],
externals: $ => [
$._automatic_semicolon,
$._indent,
$._outdent,
$._simple_string_start,
$._simple_string_middle,
$._simple_multiline_string_start,
$._interpolated_string_middle,
$._interpolated_multiline_string_middle,
$._raw_string_start,
$._raw_string_middle,
$._raw_string_multiline_middle,
$._single_line_string_end,
$._multiline_string_end,
"else",
"catch",
"finally",
"extends",
"derives",
"with",
$.error_sentinel,
],
inline: $ => [
$._pattern,
$._semicolon,
$._definition,
$._param_type,
$._identifier,
$.literal,
],
// Doc: https://tree-sitter.github.io/tree-sitter/creating-parsers, search "precedences"
// These names can be used in the prec functions to define precedence relative only to other names in the array, rather than globally.
precedences: $ => [
["mod", "soft_id"],
["end", "soft_id"],
["new", "structural_type"],
],
conflicts: $ => [
[$.tuple_type, $.parameter_types],
[$.binding, $._simple_expression],
[$.binding, $._type_identifier],
[$.while_expression, $._simple_expression],
[$.if_expression],
[$.match_expression],
[$._given_constructor, $._type_identifier],
[$.instance_expression],
// In case of: 'extension' _indent '{' 'case' operator_identifier 'if' operator_identifier • '=>' …
// we treat `operator_identifier` as `simple_expression`
[$._simple_expression, $.lambda_expression],
// 'package' package_identifier '{' operator_identifier • ':' …
[$.self_type, $._simple_expression],
// 'package' package_identifier '{' operator_identifier '=>' • 'enum' …
[$.self_type, $.lambda_expression],
// 'class' _class_constructor • _automatic_semicolon …
[$._class_definition],
// 'class' operator_identifier • _automatic_semicolon …
[$._class_constructor],
// 'enum' _class_constructor '{' 'case' operator_identifier _full_enum_def_repeat1 • _automatic_semicolon …
[$._full_enum_def],
// _start_val identifier ',' identifier • ':' …
[$.identifiers, $.val_declaration],
// 'enum' operator_identifier _automatic_semicolon '(' ')' • ':' …
[$.class_parameters],
// 'for' operator_identifier ':' _annotated_type • ':' …
[$._type, $.compound_type],
// 'given' '(' '[' _type_parameter • ',' …
[$._variant_type_parameter, $.type_lambda],
// 'given' '(' operator_identifier ':' _type • ',' …
[$.name_and_type, $.parameter],
[$._simple_expression, $.binding, $.tuple_pattern],
[$._simple_expression, $.tuple_pattern],
[$._simple_expression, $._type_identifier],
// 'if' parenthesized_expression • '{' …
[$._if_condition, $._simple_expression],
[$.block, $._braced_template_body1],
[$._simple_expression, $.self_type, $._type_identifier],
[$._simple_expression, $._type_identifier],
[$.lambda_expression, $.self_type, $._type_identifier],
[$.lambda_expression, $._type_identifier],
[$.binding, $._simple_expression, $._type_identifier],
],
word: $ => $._alpha_identifier,
rules: {
// TopStats ::= TopStat {semi TopStat}
compilation_unit: $ =>
seq(
optional($._shebang),
optional(trailingSep1($._semicolon, $._top_level_definition)),
),
_top_level_definition: $ =>
choice($._definition, $._end_marker, $.expression),
_definition: $ =>
choice(
$.given_definition,
$.extension_definition,
$.class_definition,
$.import_declaration,
$.export_declaration,
$.object_definition,
$.enum_definition,
$.trait_definition,
$.val_definition,
$.val_declaration,
$.var_definition,
$.var_declaration,
$.type_definition,
$.function_definition,
$.function_declaration,
$.package_clause,
$.package_object,
),
enum_definition: $ =>
seq(
repeat($.annotation),
"enum",
$._class_constructor,
field("extend", optional($.extends_clause)),
field("derive", optional($.derives_clause)),
field("body", $.enum_body),
),
_enum_block: $ =>
prec.left(
seq(
sep1(
$._semicolon,
choice($.enum_case_definitions, $.expression, $._definition),
),
optional($._semicolon),
),
),
enum_body: $ =>
choice(
prec.left(PREC.control, seq(":", $._indent, $._enum_block, $._outdent)),
seq(
"{",
// TODO: self type
optional($._enum_block),
"}",
),
),
enum_case_definitions: $ =>
seq(
repeat($.annotation),
"case",
choice(commaSep1($.simple_enum_case), $.full_enum_case),
),
simple_enum_case: $ =>
prec.left(
seq(
field("name", $._identifier),
field("extend", optional($.extends_clause)),
),
),
full_enum_case: $ => seq(field("name", $._identifier), $._full_enum_def),
_full_enum_def: $ =>
seq(
field("type_parameters", optional($.type_parameters)),
field("class_parameters", repeat1($.class_parameters)),
field("extend", optional($.extends_clause)),
),
package_clause: $ =>
prec.right(
seq(
"package",
field("name", $.package_identifier),
// This is slightly more permissive than the EBNF in that it allows any
// kind of declaration inside of the package blocks. As we're more
// concerned with the structure rather than the validity of the program
// we'll allow it.
field("body", optional($.template_body)),
),
),
package_identifier: $ => prec.right(sep1(".", $._identifier)),
package_object: $ => seq("package", "object", $._object_definition),
import_declaration: $ =>
prec.left(seq("import", sep1(",", $._namespace_expression))),
export_declaration: $ =>
prec.left(seq("export", sep1(",", $._namespace_expression))),
_namespace_expression: $ =>
prec.left(
seq(
field("path", sep1(".", $._identifier)),
optional(
seq(
".",
choice(
$.namespace_wildcard,
$.namespace_selectors,
// Only allowed in Scala 3
// ImportExpr ::=
// SimpleRef {‘.’ id} ‘.’ ImportSpec | SimpleRef ‘as’ id
$.as_renamed_identifier,
),
),
),
),
),
namespace_wildcard: $ => prec.left(1, choice("*", "_", "given")),
_namespace_given_by_type: $ => seq("given", $._type),
namespace_selectors: $ =>
seq(
"{",
trailingCommaSep1(
choice(
$._namespace_given_by_type,
$.namespace_wildcard,
$._identifier,
$.arrow_renamed_identifier,
$.as_renamed_identifier,
),
),
"}",
),
// deprecated: Remove when highlight query is updated for Neovim
_import_selectors: $ => alias($.namespace_selectors, $.import_selectors),
arrow_renamed_identifier: $ =>
seq(
field("name", $._identifier),
"=>",
field("alias", choice($._identifier, $.wildcard)),
),
as_renamed_identifier: $ =>
seq(
field("name", $._identifier),
"as",
field("alias", choice($._identifier, $.wildcard)),
),
object_definition: $ =>
seq(
repeat($.annotation),
optional($.modifiers),
optional("case"),
"object",
$._object_definition,
),
_object_definition: $ =>
prec.left(
seq(
field("name", $._identifier),
field("extend", optional($.extends_clause)),
field("derive", optional($.derives_clause)),
field("body", optional($._definition_body)),
),
),
class_definition: $ =>
seq(
repeat($.annotation),
optional($.modifiers),
optional("case"),
"class",
$._class_definition,
),
_class_definition: $ =>
seq(
$._class_constructor,
field("extend", optional($.extends_clause)),
field("derive", optional($.derives_clause)),
field("body", optional($._definition_body)),
),
_definition_body: $ =>
seq(optional($._automatic_semicolon), field("body", $.template_body)),
/**
* ClassConstr ::= [ClsTypeParamClause] [ConstrMods] ClsParamClauses
* ConstrMods ::= {Annotation} [AccessModifier]
*/
_class_constructor: $ =>
seq(
field("name", $._identifier),
field("type_parameters", optional($.type_parameters)),
optional($.annotation),
optional($.access_modifier),
field(
"class_parameters",
repeat(seq(optional($._automatic_semicolon), $.class_parameters)),
),
),
trait_definition: $ =>
prec.left(
seq(
repeat($.annotation),
optional($.modifiers),
"trait",
$._class_definition,
),
),
// The EBNF makes a distinction between function type parameters and other
// type parameters as you can't specify variance on function type
// parameters. This isn't important to the structure of the AST so we don't
// make that distinction.
type_parameters: $ =>
seq("[", trailingCommaSep1($._variant_type_parameter), "]"),
_variant_type_parameter: $ =>
seq(
repeat($.annotation),
choice(
$.covariant_type_parameter,
$.contravariant_type_parameter,
$._type_parameter, // invariant type parameter
$.type_lambda,
),
),
covariant_type_parameter: $ => seq("+", $._type_parameter),
contravariant_type_parameter: $ => seq("-", $._type_parameter),
_type_parameter: $ =>
seq(
field("name", choice($.wildcard, $._identifier)),
field("type_parameters", optional($.type_parameters)),
field("bound", optional($.lower_bound)),
field("bound", optional($.upper_bound)),
field("bound", optional(repeat($.view_bound))),
field("bound", optional($._context_bounds)),
),
upper_bound: $ => seq("<:", field("type", $._type)),
lower_bound: $ => seq(">:", field("type", $._type)),
view_bound: $ => seq("<%", field("type", $._type)),
_context_bounds: $ =>
choice(
repeat1(seq(":", $.context_bound)),
seq(":", "{", trailingCommaSep1($.context_bound), "}"),
),
context_bound: $ =>
seq(
field("type", $._type),
optional(seq("as", field("name", $._identifier))),
),
/*
* TemplateBody ::= :<<< [SelfType] TemplateStat {semi TemplateStat} >>>
*/
template_body: $ =>
choice($._indented_template_body, $._braced_template_body),
_indented_template_body: $ =>
prec.left(
PREC.control,
seq(":", $._indent, optional($.self_type), $._block, $._outdent),
),
_braced_template_body: $ =>
prec.left(
PREC.control,
seq(
"{",
optional(choice($._braced_template_body1, $._braced_template_body2)),
"}",
),
),
_braced_template_body1: $ => seq(optional($.self_type), $._block),
_braced_template_body2: $ =>
seq(
choice(
seq($._indent, optional($.self_type)),
seq(optional($.self_type), $._indent),
),
optional($._block),
$._outdent,
),
/*
* WithTemplateBody ::= <<< [SelfType] TemplateStat {semi TemplateStat} >>>
*/
with_template_body: $ =>
prec.left(
PREC.control,
seq($._indent, optional($.self_type), $._block, $._outdent),
),
_extension_template_body: $ =>
choice(
prec.left(PREC.control, seq($._indent, $._block, $._outdent)),
seq("{", optional($._block), "}"),
),
_end_marker: $ =>
prec.left(
"end",
seq(
"end",
choice(
"if",
"while",
"for",
"match",
"try",
"new",
"this",
"given",
"extension",
"val",
alias($._identifier, "_end_ident"),
),
),
),
// Dynamic precedences added here to win over $.call_expression
self_type: $ =>
prec.dynamic(
1,
seq($._identifier, optional($._self_type_ascription), "=>"),
),
_self_type_ascription: $ => seq(":", $._type),
annotation: $ =>
prec.right(
seq(
"@",
field("name", $._simple_type),
field("arguments", repeat($.arguments)),
),
),
val_definition: $ =>
seq(
$._start_val,
field("pattern", choice($._pattern, $.identifiers)),
optional(seq(":", field("type", $._type))),
"=",
field("value", $._indentable_expression),
),
val_declaration: $ =>
seq(
$._start_val,
commaSep1(field("name", $._identifier)),
":",
field("type", $._type),
),
_start_val: $ => seq(repeat($.annotation), optional($.modifiers), "val"),
var_declaration: $ =>
seq(
$._start_var,
commaSep1(field("name", $._identifier)),
":",
field("type", $._type),
),
var_definition: $ =>
seq(
$._start_var,
field("pattern", choice($._pattern, $.identifiers)),
optional(seq(":", field("type", $._type))),
"=",
field("value", $._indentable_expression),
),
_start_var: $ => seq(repeat($.annotation), optional($.modifiers), "var"),
type_definition: $ =>
prec.left(
seq(
repeat($.annotation),
optional($.modifiers),
optional($.opaque_modifier),
"type",
$._type_constructor,
optional(seq("=", field("type", $._type))),
),
),
// Created for memory-usage optimization during codegen.
_type_constructor: $ =>
prec.left(
seq(
field("name", $._type_identifier),
field("type_parameters", optional($.type_parameters)),
field("bound", optional($.lower_bound)),
field("bound", optional($.upper_bound)),
field("bound", optional($._context_bounds)),
),
),
function_definition: $ =>
seq(
$._function_declaration,
choice(
seq("=", field("body", $._indentable_expression)),
field("body", $.block),
),
),
function_declaration: $ => $._function_declaration,
_function_declaration: $ =>
prec.left(
seq(
repeat($.annotation),
optional($.modifiers),
"def",
$._function_constructor,
optional(seq(":", field("return_type", $._type))),
),
),
// Created for memory-usage optimization during codegen.
_function_constructor: $ =>
prec.right(
seq(
field("name", $._identifier),
field(
"parameters",
repeat(
seq(
optional($._automatic_semicolon),
choice($.parameters, $.type_parameters),
),
),
),
optional($._automatic_semicolon),
),
),
opaque_modifier: $ => prec("mod", "opaque"),
/**
* Extension ::= 'extension' [DefTypeParamClause] {UsingParamClause}
* '(' DefParam ')' {UsingParamClause} ExtMethods
*/
extension_definition: $ =>
prec.left(
seq(
"extension",
field("type_parameters", optional($.type_parameters)),
field("parameters", repeat($.parameters)),
field(
"body",
choice(
$._extension_template_body,
$.function_definition,
$.function_declaration,
),
),
),
),
/**
* GivenDef ::= [GivenSig] (AnnotType ['=' Expr] | StructuralInstance)
* GivenSig ::= [id] [DefTypeParamClause] {UsingParamClause} ':'
*/
given_definition: $ =>
prec.left(
seq(
repeat($.annotation),
optional($.modifiers),
"given",
optional($._given_constructor),
repeat($._given_sig),
choice(
field("return_type", $._structural_instance),
seq(
field("return_type", $._annotated_type),
optional(seq("=", field("body", $._indentable_expression))),
),
),
),
),
_given_sig: $ => seq($._given_conditional, "=>"),
_given_conditional: $ => alias($.parameters, $.given_conditional),
_given_constructor: $ =>
prec.right(
seq(
field("name", optional($._identifier)),
field("type_parameters", optional($.type_parameters)),
field(
"parameters",
repeat(seq(optional($._automatic_semicolon), $.parameters)),
),
optional($._automatic_semicolon),
":",
),
),
/**
* StructuralInstance ::= ConstrApp {'with' ConstrApp} ['with' WithTemplateBody]
*/
_structural_instance: $ =>
prec.left(
PREC.compound,
seq(
$._constructor_application,
choice(":", "with"),
field("body", $.with_template_body),
),
),
/**
* ConstrApp ::= SimpleType1 {Annotation} {ParArgumentExprs}
*
* Note: It would look more elegant if we could make seq(choice(), optional(arguments)),
* but that doesn't seem to work.
*/
_constructor_application: $ =>
prec.left(
PREC.constructor_app,
choice(
$._annotated_type,
$.compound_type,
// In theory structural_type should just be added to simple_type,
// but doing so increases the state of template_body to 4000
$._structural_type,
// This adds _simple_type, but not the above intentionally.
seq($._simple_type, field("arguments", $.arguments)),
seq($._annotated_type, field("arguments", $.arguments)),
seq($.compound_type, field("arguments", $.arguments)),
),
),
_constructor_applications: $ =>
prec.left(
choice(
commaSep1($._constructor_application),
sep1("with", $._constructor_application),
),
),
modifiers: $ =>
prec.left(
repeat1(
prec.left(
choice(
"abstract",
"final",
"sealed",
"implicit",
"lazy",
"override",
$.access_modifier,
$.inline_modifier,
$.infix_modifier,
$.open_modifier,
$.transparent_modifier,
),
),
),
),
access_modifier: $ =>
prec.left(
seq(choice("private", "protected"), optional($.access_qualifier)),
),
access_qualifier: $ => seq("[", $._identifier, "]"),
inline_modifier: $ => prec("mod", "inline"),
infix_modifier: $ => prec("mod", "infix"),
open_modifier: $ => prec("mod", "open"),
transparent_modifier: $ => prec("mod", "transparent"),
/**
* InheritClauses ::= ['extends' ConstrApps] ['derives' QualId {',' QualId}]
*/
extends_clause: $ =>
prec.left(
seq(
"extends",
field("type", $._constructor_applications),
optional($.arguments),
),
),
derives_clause: $ =>
prec.left(
seq(
"derives",
commaSep1(
field("type", choice($._type_identifier, $.stable_type_identifier)),
),
),
),
class_parameters: $ =>
prec(
1,
seq(
optional($._automatic_semicolon),
"(",
optional(choice("implicit", "using")),
trailingCommaSep($.class_parameter),
")",
),
),
/*
* DefParamClauses ::= {DefParamClause} [[nl] ‘(’ [‘implicit’] DefParams ‘)’]
* DefParamClause ::= [nl] ‘(’ DefParams ‘)’ | UsingParamClause
* DefParams ::= DefParam {‘,’ DefParam}
*/
parameters: $ =>
choice(
seq("(", optional("implicit"), trailingCommaSep($.parameter), ")"),
$._using_parameters_clause,
),
/*
* UsingParamClause ::= [nl] ‘(’ ‘using’ (DefParams | FunArgTypes) ‘)’
* DefParams ::= DefParam {‘,’ DefParam}
* FunArgTypes ::= FunArgType { ‘,’ FunArgType }
*/
_using_parameters_clause: $ =>
seq(
"(",
"using",
choice(
trailingCommaSep1($.parameter),
trailingCommaSep1($._param_type),
),
")",
),
class_parameter: $ =>
seq(
repeat($.annotation),
optional($.modifiers),
optional(choice("val", "var")),
field("name", $._identifier),
optional(seq(":", field("type", $._param_type))),
optional(seq("=", field("default_value", $.expression))),
),
/*
* DefParam ::= {Annotation} [‘inline’] Param
* Param ::= id ‘:’ ParamType [‘=’ Expr]
*/
parameter: $ =>
prec.left(
PREC.control,
seq(
repeat($.annotation),
optional($.inline_modifier),
field("name", $._identifier),
":",
field("type", $._param_type),
optional(seq("=", field("default_value", $.expression))),
),
),
/*
* NameAndType ::= id ':' Type
*/
name_and_type: $ =>
prec.left(
PREC.control,
seq(field("name", $._identifier), ":", field("type", $._param_type)),
),
_block: $ =>
prec.left(
seq(
sep1(
$._semicolon,
choice($.expression, $._definition, $._end_marker, ";"),
),
optional($._semicolon),
),
),
_indentable_expression: $ =>
prec.right(choice($.indented_block, $.indented_cases, $.expression)),
block: $ => seq("{", optional($._block), "}"),
indented_block: $ =>
prec.left(
PREC.control,
seq($._indent, $._block, $._outdent, optional($._end_marker)),
),
indented_cases: $ =>
prec.left(seq($._indent, repeat1($.case_clause), $._outdent)),
_indented_type_cases: $ =>
prec.left(seq($._indent, repeat1($.type_case_clause), $._outdent)),
// ---------------------------------------------------------------
// Types
_type: $ =>
choice(
$.function_type,
$.compound_type,
$.infix_type,
$.match_type,
$._annotated_type,
$.literal_type,
$._structural_type,
$.type_lambda,
),
_annotated_type: $ => prec.right(choice($.annotated_type, $._simple_type)),
annotated_type: $ => prec.right(seq($._simple_type, repeat1($.annotation))),
_simple_type: $ =>
prec.left(
PREC.type,
choice(
$.generic_type,
$.projected_type,
$.tuple_type,
$.named_tuple_type,
$.singleton_type,
$.stable_type_identifier,
$._type_identifier,
$.wildcard,
),
),
compound_type: $ =>
choice(
prec.left(
PREC.compound,
seq(
field("base", $._annotated_type),
repeat1(seq("with", field("extra", $._annotated_type))),
),
),
prec.left(seq(field("base", $._annotated_type), $._refinement)),
prec.left(
-1,
seq(
prec.left(
PREC.compound,
seq(
field("base", $._annotated_type),
repeat1(seq("with", field("extra", $._annotated_type))),
),
),
$._refinement,
),
),
),
_structural_type: $ =>
prec("structural_type", alias($.template_body, $.structural_type)),
_refinement: $ => alias($.template_body, $.refinement),
// This does not include _simple_type since _annotated_type covers it.
_infix_type_choice: $ =>
prec.left(
PREC.infix,
choice(
$.compound_type,
$.infix_type,
$._annotated_type,
$.literal_type,
),
),
infix_type: $ =>
prec.left(
PREC.infix,
seq(
field("left", $._infix_type_choice),
field("operator", $._identifier),
field("right", $._infix_type_choice),
),
),
tuple_type: $ => seq("(", trailingCommaSep1($._type), ")"),
named_tuple_type: $ => seq("(", trailingCommaSep1($.name_and_type), ")"),
singleton_type: $ =>
prec.left(
PREC.stable_type_id,
seq(choice($._identifier, $.stable_identifier), ".", "type"),
),
stable_type_identifier: $ =>
prec.left(
PREC.stable_type_id,
seq(
choice($._identifier, $.stable_identifier),
".",
$._type_identifier,
),
),
stable_identifier: $ =>
prec.left(
PREC.stable_id,
seq(choice($._identifier, $.stable_identifier), ".", $._identifier),
),
generic_type: $ =>
seq(
field("type", $._simple_type),
field("type_arguments", $.type_arguments),
),
projected_type: $ =>
seq(
field("type", $._simple_type),
"#",
field("selector", $._type_identifier),
),
match_type: $ =>
prec.left(seq($._infix_type_choice, "match", $._indented_type_cases)),
type_case_clause: $ =>
prec.left(
PREC.control,
seq("case", $._infix_type_choice, field("body", $._arrow_then_type)),
),
function_type: $ =>
prec.left(
choice(
seq(field("type_parameters", $.type_parameters), $._arrow_then_type),
seq(field("parameter_types", $.parameter_types), $._arrow_then_type),
),
),
_arrow_then_type: $ =>
prec.right(seq(choice("=>", "?=>"), field("return_type", $._type))),
// Deprioritize against typed_pattern._type.
parameter_types: $ =>
prec(
-1,
choice(
$._annotated_type,
// Prioritize a parenthesized param list over a single tuple_type.
prec.dynamic(1, seq("(", trailingCommaSep($._param_type), ")")),
$.compound_type,
$.infix_type,
),
),
_param_type: $ =>
choice($._type, $.lazy_parameter_type, $.repeated_parameter_type),
lazy_parameter_type: $ => seq("=>", field("type", $._type)),
repeated_parameter_type: $ =>
prec.left(PREC.postfix, seq(field("type", $._type), "*")),
_type_identifier: $ => alias($._identifier, $.type_identifier),
type_lambda: $ =>
seq(
"[",
trailingCommaSep1($._type_parameter),
"]",
"=>>",
field("return_type", $._type),
),
// ---------------------------------------------------------------
// Patterns
_pattern: $ =>
choice(
$._identifier,
$.stable_identifier,
$.interpolated_string_expression,
$.capture_pattern,
$.tuple_pattern,
$.named_tuple_pattern,
$.case_class_pattern,
$.infix_pattern,
$.alternative_pattern,
$.typed_pattern,
$.given_pattern,
$.quote_expression,
$.literal,
$.wildcard,
$.repeat_pattern,
),
case_class_pattern: $ =>
seq(
field("type", choice($._type_identifier, $.stable_type_identifier)),
"(",
choice(
field("pattern", trailingCommaSep($._pattern)),
field("pattern", trailingCommaSep($.named_pattern)),
),
")",
),
infix_pattern: $ =>
prec.left(
PREC.infix,
seq(
field("left", $._pattern),
field("operator", $._identifier),
field("right", $._pattern),
),
),
capture_pattern: $ =>
prec.right(
PREC.field,
seq(
field("name", choice($._identifier, $.wildcard)),
"@",
field("pattern", $._pattern),
),
),
repeat_pattern: $ => prec.right(seq(field("pattern", $._pattern), "*")),
typed_pattern: $ =>
prec.right(
-1,
seq(field("pattern", $._pattern), ":", field("type", $._type)),
),
given_pattern: $ => seq("given", field("type", $._type)),
// TODO: Flatten this.
alternative_pattern: $ => prec.left(-2, seq($._pattern, "|", $._pattern)),
tuple_pattern: $ => seq("(", trailingCommaSep1($._pattern), ")"),
named_pattern: $ => prec.left(-1, seq($._identifier, "=", $._pattern)),
named_tuple_pattern: $ => seq("(", trailingCommaSep1($.named_pattern), ")"),
// ---------------------------------------------------------------
// Expressions
expression: $ =>
choice(
$.if_expression,
$.match_expression,
$.try_expression,
$.assignment_expression,
$.lambda_expression,
$.postfix_expression,
$.ascription_expression,
$.infix_expression,
$.prefix_expression,
$.return_expression,
$.throw_expression,
$.while_expression,
$.do_while_expression,
$.for_expression,
$.macro_body,
$._simple_expression,
),
/**
* SimpleExpr ::= SimpleRef
* | Literal
* | '_'
* | BlockExpr
* | ExprSplice
* | Quoted
* | quoteId
* | 'new' ConstrApp {'with' ConstrApp} [TemplateBody]
* | 'new' TemplateBody
* | '(' ExprsInParens ')'
* | SimpleExpr '.' id
* | SimpleExpr '.' MatchClause
* | SimpleExpr TypeArgs
* | SimpleExpr ArgumentExprs
* | SimpleExpr ColonArgument
* TODO: ColonArgument
*/
_simple_expression: $ =>
choice(
$.identifier,
$.operator_identifier,
$.literal,
$.interpolated_string_expression,
$.unit,
$.tuple_expression,
$.wildcard,
$.block,
$.splice_expression,
$.case_block,
$.quote_expression,
$.instance_expression,
$.parenthesized_expression,
$.field_expression,
$.generic_function,
$.call_expression,
),
lambda_expression: $ =>
prec.right(
seq(
optional(seq(field("type_parameters", $.type_parameters), "=>")),
field(
"parameters",
choice(
$.bindings,
seq(optional("implicit"), $._identifier),
$.wildcard,
),
),
choice("=>", "?=>"),
$._indentable_expression,
),
),
/*
* ::= [‘inline’] ‘if’ ‘(’ Expr ‘)’ {nl} Expr [[semi] ‘else’ Expr]
* | [‘inline’] ‘if’ Expr ‘then’ Expr [[semi] ‘else’ Expr]
*/
if_expression: $ =>
seq(
optional($.inline_modifier),
"if",
field("condition", $._if_condition),
field("consequence", $._indentable_expression),
optional(
seq(
optional(";"),
"else",
field("alternative", $._indentable_expression),
),
),
),
// NOTE(susliko): _if_condition and its magic dynamic precedence were introduced as a fix to
// https://github.com/tree-sitter/tree-sitter-scala/issues/263 and
// https://github.com/tree-sitter/tree-sitter-scala/issues/342
// Neither do I understand why this works, nor have I found a better solution
_if_condition: $ =>
prec.dynamic(
4,
choice(
$.parenthesized_expression,
seq($._indentable_expression, "then"),
),
),
/*
* MatchClause ::= 'match' <<< CaseClauses >>>
*/
match_expression: $ =>
seq(
optional($.inline_modifier),
field("value", $.expression),
"match",
field("body", choice($.case_block, $.indented_cases)),
),
try_expression: $ =>
prec.right(
PREC.control,
seq(
"try",
field("body", $._indentable_expression),
optional($.catch_clause),
optional($.finally_clause),
),
),
/*
* Catches ::= 'catch' (Expr | ExprCaseClause)
*/
catch_clause: $ =>
prec.right(
seq("catch", choice($._indentable_expression, $._expr_case_clause)),
),
_expr_case_clause: $ =>
prec.left(seq("case", $._case_pattern, field("body", $.expression))),
finally_clause: $ => prec.right(seq("finally", $._indentable_expression)),
/*
* Binding ::= (id | ‘_’) [‘:’ Type]
*/
binding: $ =>
prec.dynamic(
PREC.binding,
seq(
choice(field("name", $._identifier), $.wildcard),
optional(seq(":", field("type", $._param_type))),
),
),
bindings: $ => seq("(", trailingCommaSep($.binding), ")"),
case_block: $ =>
choice(prec(-1, seq("{", "}")), seq("{", repeat1($.case_clause), "}")),
case_clause: $ =>
prec.left(
seq("case", $._case_pattern, field("body", optional($._block))),
),
// This is created to capture guard from the right
_case_pattern: $ =>
prec.right(
10,
seq(field("pattern", $._pattern), optional($.guard), "=>"),
),
guard: $ =>
prec.left(
PREC.control,
seq("if", field("condition", $._postfix_expression_choice)),
),
assignment_expression: $ =>
prec.right(
PREC.assign,
seq(
field("left", choice($.prefix_expression, $._simple_expression)),
"=",
field("right", $.expression),
),
),
generic_function: $ =>
prec(
PREC.call,
seq(
field("function", $.expression),
field("type_arguments", $.type_arguments),
),
),
call_expression: $ =>
choice(
prec.left(
PREC.call,
seq(
field("function", $._simple_expression),
field("arguments", choice($.arguments, $.case_block, $.block)),
),
),
prec.right(
PREC.colon_call,
seq(
field("function", $._postfix_expression_choice),
":",
field("arguments", $.colon_argument),
),
),
),
/**
* ColonArgument ::= colon [LambdaStart]
* (CaseClauses | Block)
*/
colon_argument: $ =>
prec.left(
PREC.colon_call,
seq(
optional(
field(
"lambda_start",
seq(choice($.bindings, $._identifier, $.wildcard), "=>"),
),
),
choice($.indented_block, $.indented_cases),
),
),
field_expression: $ =>
prec.left(
PREC.field,
seq(
field("value", $._simple_expression),
".",
field("field", $._identifier),
),
),
/**
* SimpleExpr ::= SimpleRef
* | 'new' ConstrApp {'with' ConstrApp} [TemplateBody]
* | 'new' TemplateBody
*/
instance_expression: $ =>
choice(
// This is weakened so ascription wins for new Array: Array
prec.dynamic(
0,
seq("new", $._constructor_application, $.template_body),
),
prec("new", seq("new", $.template_body)),
seq("new", $._constructor_application),
),
/**
* PostfixExpr [Ascription]
*/
ascription_expression: $ =>
prec.dynamic(
PREC.ascription,
seq(
$._postfix_expression_choice,
":",
choice($._param_type, $.annotation),
),
),
infix_expression: $ =>
prec.left(
PREC.infix,
seq(
field(
"left",
choice(
$.infix_expression,
$.prefix_expression,
$._simple_expression,
),
),
field("operator", $._identifier),
field(
"right",
choice(
$.prefix_expression,
$._simple_expression,
seq(":", $.colon_argument),
),
),
),
),
/**
* PostfixExpr ::= InfixExpr [id]
*/
postfix_expression: $ =>
prec.left(
PREC.postfix,
seq(
choice($.infix_expression, $.prefix_expression, $._simple_expression),
$._identifier,
),
),
_postfix_expression_choice: $ =>
prec.left(
PREC.postfix,
choice(
$.postfix_expression,
$.infix_expression,
$.prefix_expression,
$._simple_expression,
),
),
macro_body: $ =>
prec.left(
PREC.macro,
seq(
"macro",
choice($.infix_expression, $.prefix_expression, $._simple_expression),
),
),
/**
* PrefixExpr ::= [PrefixOperator] SimpleExpr
*/
prefix_expression: $ =>
prec(PREC.prefix, seq(choice("+", "-", "!", "~"), $._simple_expression)),
tuple_expression: $ =>
seq(
"(",
$.expression,
repeat1(seq(",", $.expression)),
optional(","),
")",
),
parenthesized_expression: $ => seq("(", $.expression, ")"),
type_arguments: $ => seq("[", trailingCommaSep1($._type), "]"),
arguments: $ =>
seq(
"(",
choice(optional($._exprs_in_parens), seq("using", $._exprs_in_parens)),
")",
),
// ExprsInParens ::= ExprInParens {‘,’ ExprInParens}
_exprs_in_parens: $ => trailingCommaSep1($.expression),
splice_expression: $ =>
prec.left(
PREC.macro,
seq(
"$",
choice(
seq("{", $._block, "}"),
seq("[", $._type, "]"),
// TODO: This would never hit, since identifier permits $ sign
$.identifier,
),
),
),
quote_expression: $ =>
prec.left(
PREC.macro,
seq(
"'",
choice(seq("{", $._block, "}"), seq("[", $._type, "]"), $.identifier),
),
),
/**
* id ::= plainid
* | ‘`’ { charNoBackQuoteOrNewline | UnicodeEscape | charEscapeSeq
*/
identifier: $ =>
prec.left(
choice($._alpha_identifier, $._backquoted_id, $._soft_identifier),
),
// https://docs.scala-lang.org/scala3/reference/soft-modifier.html
_soft_identifier: $ =>
prec(
"soft_id",
choice("infix", "inline", "opaque", "open", "transparent", "end"),
),
/**
* alphaid ::= upper idrest
* | varid
* We approximate the above as:
* /[A-Za-z\$_][A-Z\$_a-z0-9]*(_[\-!#%&*+\/\\:<=>?@\u005e\u007c~]+)?/,
*
* The following is more accurate, but the state count goes over the unsigned short int, and should be comparable.
* /([\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$][\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F0-9]*(_[\-!#%&*+\/\\:<=>?@\u005e\u007c~]+)?|[\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F_][\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F0-9]*(_[\-!#%&*+/\\:<=>?@\u005e\u007c~]+)?|[\-!#%&*+\/\\:<=>?@\u005e\u007c~]+)|[\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F_][\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F0-9]*(_[\-!#%&*+\/\\:<=>?@\u005e\u007c~]+)?/,
*/
_alpha_identifier: $ =>
/[\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F\$][\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\$\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F0-9\$_\p{Ll}]*(_[\-!#%&*+\/\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]+)?/,
/**
* Despite what the lexical syntax suggests, the alphaid rule doesn't apply
* to identifiers that aren't in blocks in interpolated strings (e.g. $foo).
* A more accurate description is given in
* https://www.scala-lang.org/files/archive/spec/2.13/01-lexical-syntax.html
* where it states (regarding dollar sign escapes in interpolated strings) that
* """
* The simpler form consists of a ‘$’-sign followed by an identifier starting
* with a letter and followed only by letters, digits, and underscore characters
* """
* where "letters" does not include the $ character.
*
* This rule is similar to the _alpha_identifier rule, with the differences
* being that the $ character is excluded, along with the _(operator_chars)
* suffix and can be approximated as
* /[A-Za-z_][A-Z_a-z0-9]/;
*/
_interpolation_identifier: $ =>
/[\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F][\p{Lu}\p{Lt}\p{Nl}\p{Lo}\p{Lm}\p{Ll}_\u00AA\u00BB\u02B0-\u02B8\u02C0-\u02C1\u02E0-\u02E4\u037A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\uA69C-\uA69D\uA770\uA7F8-\uA7F9\uAB5C-\uAB5F0-9_\p{Ll}]*/,
_backquoted_id: $ => /`[^\n`]+`/,
_identifier: $ => choice($.identifier, $.operator_identifier),
identifiers: $ => seq($.identifier, ",", commaSep1($.identifier)),
wildcard: $ => "_",
/**
* Regex patterns created to avoid matching // comments and /* comment starts.
* This could technically match illegal tokens such as val ?// = 1
*/
operator_identifier: $ =>
token(
choice(
// opchar minus colon, equal, at
// Technically speaking, Sm (Math symbols https://www.compart.com/en/unicode/category/Sm)
// should be allowed as a single-character opchar, however, it includes `=`,
// so we should to avoid that to prevent bad parsing of `=` as infix term or type.
/[\-!#%&*+\/\\<>?\u005e\u007c~\u00ac\u00b1\u00d7\u00f7\u2190-\u2194\p{So}]/,
seq(
// opchar minus slash
/[\-!#%&*+\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]/,
// opchar*
repeat1(/[\-!#%&*+\/\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]/),
),
seq(
// opchar
/[\-!#%&*+\/\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]/,
// opchar minus slash and asterisk
/[\-!#%&+\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]/,
// opchar*
repeat(/[\-!#%&*+\/\\:<=>?@\u005e\u007c~\p{Sm}\p{So}]/),
),
),
),
_non_null_literal: $ =>
choice(
$.integer_literal,
$.floating_point_literal,
$.boolean_literal,
$.character_literal,
$.string,
),
literal_type: $ => prec.left(PREC.type, $._non_null_literal),
literal: $ => choice($._non_null_literal, $.null_literal),
integer_literal: $ =>
token(
seq(
optional(/[-]/),
choice(/[\d](_?\d)*/, /0[xX][\da-fA-F](_?[\da-fA-F])*/),
optional(/[lL]/),
),
),
floating_point_literal: $ =>
token(
seq(
optional(/[-]/),
choice(
// digit {digit} ‘.’ digit {digit} [exponentPart] [floatType]
seq(/[\d]+\.[\d]+/, optional(/[eE][+-]?[\d]+/), optional(/[dfDF]/)),
// ‘.’ digit {digit} [exponentPart] [floatType]
seq(/\.[\d]+/, optional(/[eE][+-]?[\d]+/), optional(/[dfDF]/)),
// digit {digit} exponentPart [floatType]
seq(/[\d]+/, /[eE][+-]?[\d]+/, optional(/[dfDF]/)),
// digit {digit} [exponentPart] floatType
seq(/[\d]+/, optional(/[eE][+-]?[\d]+/), /[dfDF]/),
),
),
),
boolean_literal: $ => choice("true", "false"),
character_literal: $ =>
token(
seq(
"'",
optional(
choice(
seq(
"\\",
choice(/[^xu]/, /[uU]+[0-9a-fA-F]{4}/, /x[0-9a-fA-F]{2}/),
),
/[^\\'\n]/,
),
),
"'",
),
),
interpolated_string_expression: $ =>
choice(
seq(
field("interpolator", alias($._raw_string_start, $.identifier)),
alias($._raw_string, $.interpolated_string),
),
seq(field("interpolator", $.identifier), $.interpolated_string),
),
_dollar_escape: $ =>
alias(token(seq("$", choice("$", '"'))), $.escape_sequence),
_aliased_interpolation_identifier: $ =>
alias($._interpolation_identifier, $.identifier),
interpolation: $ =>
seq("$", choice($._aliased_interpolation_identifier, $.block)),
interpolated_string: $ =>
choice(
seq(
token.immediate('"'),
repeat(
seq(
$._interpolated_string_middle,
choice($._dollar_escape, $.interpolation, $.escape_sequence),
),
),
$._single_line_string_end,
),
seq(
token.immediate('"""'),
repeat(
seq(
$._interpolated_multiline_string_middle,
// Multiline strings ignore escape sequences
choice($._dollar_escape, $.interpolation),
),
),
$._multiline_string_end,
),
),
// We need to handle single-line raw strings separately from interpolated strings,
// because raw strings are not parsed for escape sequences. For example, raw strings
// are often used for regular expressions, which contain backslashes that would
// be invalid if parsed as escape sequences. We do not special case multiline
// raw strings, because multiline strings do not parse escape sequences anyway.
// Scala handles multiline raw strings identically to other multiline interpolated,
// so we could parse them as interpolated strings, but I think the code is cleaner
// if we maintain the distinction.
_raw_string: $ =>
choice(
seq(
$._simple_string_start,
seq(
repeat(
seq(
$._raw_string_middle,
choice($._dollar_escape, $.interpolation),
),
),
$._single_line_string_end,
),
),
seq(
$._simple_multiline_string_start,
repeat(
seq(
$._raw_string_multiline_middle,
choice($._dollar_escape, $.interpolation),
),
),
$._multiline_string_end,
),
),
escape_sequence: _ =>
token.immediate(
seq(
"\\",
choice(
/[tbnrf"'\\]/,
// The Java spec allows any number of u's and U's at the start of a unicode escape.
/[uU]+[0-9a-fA-F]{4}/,
// Octals are not allowed in Scala 3, but are allowed in Scala 2. tree-sitter
// does not have a mechanism for distinguishing between different versions of a
// language, so I think it makes sense to allow them. Maybe in the future we
// should move them to a `deprecated` syntax node?
/[0-3]?[0-7]{1,2}/,
),
),
),
string: $ =>
choice(
seq(
$._simple_string_start,
repeat(seq($._simple_string_middle, $.escape_sequence)),
$._single_line_string_end,
),
seq(
$._simple_multiline_string_start,
/// Multiline strings ignore escape sequences
$._multiline_string_end,
),
),
_semicolon: $ => choice(";", $._automatic_semicolon),
null_literal: $ => "null",
unit: $ => prec(PREC.unit, seq("(", ")")),
return_expression: $ => prec.left(seq("return", optional($.expression))),
throw_expression: $ => prec.left(seq("throw", $.expression)),
/*
* Expr1 ::= 'while' '(' Expr ')' {nl} Expr
* | 'while' Expr 'do' Expr
*/
while_expression: $ =>
prec(
PREC.while,
choice(
prec.right(
seq(
"while",
field("condition", $.parenthesized_expression),
field("body", $.expression),
),
),
prec.right(
seq(
"while",
field("condition", seq($._indentable_expression, "do")),
field("body", $._indentable_expression),
),
),
),
),
do_while_expression: $ =>
prec.right(
seq(
"do",
field("body", $.expression),
"while",
field("condition", $.parenthesized_expression),
),
),
/*
* ForExpr ::= 'for' '(' Enumerators0 ')' {nl} ['do' | 'yield'] Expr
* | 'for' '{' Enumerators0 '}' {nl} ['do' | 'yield'] Expr
* | 'for' Enumerators0 ('do' | 'yield') Expr
*/
for_expression: $ =>
choice(
prec.right(
PREC.control,
seq(
"for",
field(
"enumerators",
choice(
seq("(", $.enumerators, ")"),
seq("{", $.enumerators, "}"),
),
),
choice(
seq(field("body", $.expression)),
seq("yield", field("body", $._indentable_expression)),
),
),
),
prec.right(
PREC.control,
seq(
"for",
field("enumerators", $.enumerators),
choice(
seq("do", field("body", $._indentable_expression)),
seq("yield", field("body", $._indentable_expression)),
),
),
),
),
enumerators: $ =>
choice(
seq(sep1($._semicolon, $.enumerator), optional($._automatic_semicolon)),
seq(
$._indent,
sep1($._semicolon, $.enumerator),
optional($._automatic_semicolon),
$._outdent,
),
),
/**
* Enumerator ::= Generator
* | Guard {Guard}
* | Pattern1 '=' Expr
*/
enumerator: $ =>
choice(
seq(
optional("case"),
$._pattern,
choice("<-", "="),
$.expression,
optional($.guard),
),
repeat1($.guard),
),
_shebang: $ => alias(token(seq("#!", /.*/)), $.comment),
comment: $ => seq(token("//"), choice($.using_directive, $._comment_text)),
_comment_text: $ => token(prec(PREC.comment, /.*/)),
using_directive: $ =>
seq(
token.immediate(prec(PREC.using_directive, ">")),
token("using"),
$.using_directive_key,
$.using_directive_value,
),
using_directive_key: $ => token(/[^\s]+/),
using_directive_value: $ => token(/.*/),
block_comment: $ =>
seq(token("/*"), repeat(choice(token(/./), token("//"))), token("*/")),
},
});
function commaSep(rule) {
return optional(commaSep1(rule));
}
function commaSep1(rule) {
return sep1(",", rule);
}
function trailingCommaSep(rule) {
return optional(trailingCommaSep1(rule));
}
function trailingCommaSep1(rule) {
return trailingSep1(",", rule);
}
function trailingSep1(delimiter, rule) {
return seq(sep1(delimiter, rule), optional(delimiter));
}
function sep1(delimiter, rule) {
return seq(rule, repeat(seq(delimiter, rule)));
}