const WHITESPACE = /[\s\u0085\u00a0\u1680\u2000-\u200b\u2028\u2029\u202f\u205f\u3000\ufeff]+/;
module.exports = grammar({
name: 'html',
extras: $ => [
$.comment,
WHITESPACE,
],
externals: $ => [
$._start_tag_name,
$._raw_text_start_tag_name,
$._end_tag_name,
$.erroneous_end_tag_name,
'/>',
$._implicit_end_tag,
$.raw_text,
$.comment,
$.text,
],
rules: {
document: $ => repeat($._node),
doctype: $ => seq(
'<!',
alias($._doctype, 'doctype'),
/[^>]+/,
'>',
),
_doctype: _ => /[Dd][Oo][Cc][Tt][Yy][Pp][Ee]/,
_node: $ => choice(
$.doctype,
$.entity,
$.text,
$.element,
$.erroneous_end_tag,
),
element: $ => choice(
seq(
$.start_tag,
repeat($._node),
choice($.end_tag, $._implicit_end_tag),
),
$._raw_text_element,
$.self_closing_tag,
),
_raw_text_element: $ => seq(
alias($._raw_text_start_tag, $.start_tag),
optional($.raw_text),
$.end_tag,
),
start_tag: $ => seq(
'<',
field('name', alias($._start_tag_name, $.tag_name)),
repeat($.attribute),
'>',
),
_raw_text_start_tag: $ => seq(
'<',
field('name', alias($._raw_text_start_tag_name, $.tag_name)),
repeat($.attribute),
'>',
),
self_closing_tag: $ => seq(
'<',
field('name', alias($._start_tag_name, $.tag_name)),
repeat($.attribute),
'/>',
),
end_tag: $ => seq(
'</',
field('name', alias($._end_tag_name, $.tag_name)),
'>',
),
erroneous_end_tag: $ => seq(
'</',
$.erroneous_end_tag_name,
'>',
),
attribute: $ => seq(
$.attribute_name,
optional(seq(
'=',
choice(
$.attribute_value,
$.quoted_attribute_value,
),
)),
),
attribute_name: _ => /[^<>"'/=\s\u0085\u00a0\u1680\u2000-\u200b\u2028\u2029\u202f\u205f\u3000\ufeff]+/,
attribute_value: _ => /[^<>"'=\s\u0085\u00a0\u1680\u2000-\u200b\u2028\u2029\u202f\u205f\u3000\ufeff`]+/,
quoted_attribute_value: $ => choice(
seq('\'', optional(alias(/[^']+/, $.attribute_value)), '\''),
seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"'),
),
entity: _ => /&(#([xX][0-9a-fA-F]{1,6}|[0-9]{1,7})|[A-Za-z][A-Za-z0-9]*);?/,
},
});