use crate::event::Name;
use crate::message;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::{
char::{
after_index as char_after_index, format_byte, format_opt as format_char_opt,
kind_after_index, Kind as CharacterKind,
},
identifier::{id_cont, id_start},
};
use alloc::{boxed::Box, format};
use core::str;
pub fn start(tokenizer: &mut Tokenizer) -> State {
debug_assert_eq!(tokenizer.current, Some(b'<'));
tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
tokenizer.enter(Name::MdxJsxTagMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagMarker);
State::Next(StateName::MdxJsxStartAfter)
}
pub fn start_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b'\n' | b' ') = tokenizer.current {
State::Nok
} else {
tokenizer.attempt(State::Next(StateName::MdxJsxNameBefore), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
}
pub fn name_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'/') => {
tokenizer.enter(Name::MdxJsxTagClosingMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagClosingMarker);
tokenizer.attempt(
State::Next(StateName::MdxJsxClosingTagNameBefore),
State::Nok,
);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),
_ => {
if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagName);
tokenizer.enter(Name::MdxJsxTagNamePrimary);
tokenizer.consume();
State::Next(StateName::MdxJsxPrimaryName)
} else {
crash(
tokenizer,
"before name",
&format!(
"a character that can start a name, such as a letter, `$`, or `_`{}",
if tokenizer.current == Some(b'!') {
" (note: to create a comment in MDX, use `{/* text */}`)"
} else {
""
}
),
)
}
}
}
}
pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
State::Retry(StateName::MdxJsxTagEnd)
}
else if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagName);
tokenizer.enter(Name::MdxJsxTagNamePrimary);
tokenizer.consume();
State::Next(StateName::MdxJsxPrimaryName)
} else {
crash(
tokenizer,
"before name",
&format!(
"a character that can start a name, such as a letter, `$`, or `_`{}",
if tokenizer.current == Some(b'*' | b'/') {
" (note: JS comments in JSX tags are not supported in MDX)"
} else {
""
}
),
)
}
}
pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'.' | b'/' | b':' | b'>' | b'{'))
{
tokenizer.exit(Name::MdxJsxTagNamePrimary);
tokenizer.attempt(State::Next(StateName::MdxJsxPrimaryNameAfter), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.consume();
State::Next(StateName::MdxJsxPrimaryName)
} else {
crash(
tokenizer,
"in name",
&format!(
"a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag{}",
if tokenizer.current == Some(b'@') {
" (note: to create a link in MDX, use `[text](url)`)"
} else {
""
}
),
)
}
}
pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
tokenizer.enter(Name::MdxJsxTagNameMemberMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagNameMemberMarker);
tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
Some(b':') => {
tokenizer.enter(Name::MdxJsxTagNamePrefixMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagNamePrefixMarker);
tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameBefore), State::Nok);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
_ => {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
|| id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.exit(Name::MdxJsxTagName);
State::Retry(StateName::MdxJsxAttributeBefore)
} else {
crash(
tokenizer,
"after name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
}
}
pub fn member_name_before(tokenizer: &mut Tokenizer) -> State {
if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagNameMember);
tokenizer.consume();
State::Next(StateName::MdxJsxMemberName)
} else {
crash(
tokenizer,
"before member name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
pub fn member_name(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'.' | b'/' | b'>' | b'{'))
{
tokenizer.exit(Name::MdxJsxTagNameMember);
tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameAfter), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.consume();
State::Next(StateName::MdxJsxMemberName)
} else {
crash(
tokenizer,
"in member name",
&format!(
"a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag{}",
if tokenizer.current == Some(b'@') {
" (note: to create a link in MDX, use `[text](url)`)"
} else {
""
}
),
)
}
}
pub fn member_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
tokenizer.enter(Name::MdxJsxTagNameMemberMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagNameMemberMarker);
tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
_ => {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
|| id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.exit(Name::MdxJsxTagName);
State::Retry(StateName::MdxJsxAttributeBefore)
} else {
crash(
tokenizer,
"after member name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
}
}
pub fn local_name_before(tokenizer: &mut Tokenizer) -> State {
if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagNameLocal);
tokenizer.consume();
State::Next(StateName::MdxJsxLocalName)
} else {
crash(
tokenizer,
"before local name",
&format!(
"a character that can start a name, such as a letter, `$`, or `_`{}",
if matches!(tokenizer.current, Some(b'+' | b'/'..=b'9')) {
" (note: to create a link in MDX, use `[text](url)`)"
} else {
""
}
),
)
}
}
pub fn local_name(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
{
tokenizer.exit(Name::MdxJsxTagNameLocal);
tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameAfter), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.consume();
State::Next(StateName::MdxJsxLocalName)
} else {
crash(
tokenizer,
"in local name",
"a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
pub fn local_name_after(tokenizer: &mut Tokenizer) -> State {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
|| id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.exit(Name::MdxJsxTagName);
State::Retry(StateName::MdxJsxAttributeBefore)
} else {
crash(
tokenizer,
"after local name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
pub fn attribute_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'/') => {
tokenizer.enter(Name::MdxJsxTagSelfClosingMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagSelfClosingMarker);
tokenizer.attempt(State::Next(StateName::MdxJsxSelfClosing), State::Nok);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),
Some(b'{') => {
tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone();
tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeExpression;
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeExpressionAfter),
State::Nok,
);
State::Retry(StateName::MdxExpressionStart)
}
_ => {
if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagAttribute);
tokenizer.enter(Name::MdxJsxTagAttributeName);
tokenizer.enter(Name::MdxJsxTagAttributePrimaryName);
tokenizer.consume();
State::Next(StateName::MdxJsxAttributePrimaryName)
} else {
crash(
tokenizer,
"before attribute name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag"
)
}
}
}
}
pub fn attribute_expression_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = tokenizer.tokenize_state.token_2.clone();
tokenizer.tokenize_state.token_2 = Name::Data;
tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'/' | b':' | b'=' | b'>' | b'{'))
{
tokenizer.exit(Name::MdxJsxTagAttributePrimaryName);
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributePrimaryNameAfter),
State::Nok,
);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.consume();
State::Next(StateName::MdxJsxAttributePrimaryName)
} else {
crash(
tokenizer,
"in attribute name",
"an attribute name character such as letters, digits, `$`, or `_`; `=` to initialize a value; whitespace before attributes; or the end of the tag"
)
}
}
pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b':') => {
tokenizer.enter(Name::MdxJsxTagAttributeNamePrefixMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagAttributeNamePrefixMarker);
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeLocalNameBefore),
State::Nok,
);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
Some(b'=') => {
tokenizer.exit(Name::MdxJsxTagAttributeName);
tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker);
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeValueBefore),
State::Nok,
);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
_ => {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
|| id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.exit(Name::MdxJsxTagAttributeName);
tokenizer.exit(Name::MdxJsxTagAttribute);
tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
} else {
crash(
tokenizer,
"after attribute name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag"
)
}
}
}
}
pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State {
if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
tokenizer.enter(Name::MdxJsxTagAttributeNameLocal);
tokenizer.consume();
State::Next(StateName::MdxJsxAttributeLocalName)
} else {
crash(
tokenizer,
"before local attribute name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag"
)
}
}
pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'/' | b'=' | b'>' | b'{'))
{
tokenizer.exit(Name::MdxJsxTagAttributeNameLocal);
tokenizer.exit(Name::MdxJsxTagAttributeName);
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeLocalNameAfter),
State::Nok,
);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.consume();
State::Next(StateName::MdxJsxAttributeLocalName)
} else {
crash(
tokenizer,
"in local attribute name",
"an attribute name character such as letters, digits, `$`, or `_`; `=` to initialize a value; whitespace before attributes; or the end of the tag"
)
}
}
pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'=') => {
tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker);
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeValueBefore),
State::Nok,
);
State::Next(StateName::MdxJsxEsWhitespaceStart)
}
_ => {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
|| id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
{
tokenizer.exit(Name::MdxJsxTagAttribute);
State::Retry(StateName::MdxJsxAttributeBefore)
} else {
crash(
tokenizer,
"after local attribute name",
"a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag"
)
}
}
}
}
pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.enter(Name::MdxJsxTagAttributeValueLiteral);
tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker);
State::Next(StateName::MdxJsxAttributeValueQuotedStart)
}
Some(b'{') => {
tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone();
tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeValueExpression;
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeValueExpressionAfter),
State::Nok,
);
State::Retry(StateName::MdxExpressionStart)
}
_ => crash(
tokenizer,
"before attribute value",
&format!(
"a character that can start an attribute value, such as `\"`, `'`, or `{{`{}",
if tokenizer.current == Some(b'<') {
" (note: to use an element or fragment as a prop value in MDX, use `{<element />}`)"
} else {
""
}
),
),
}
}
pub fn attribute_value_expression_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = tokenizer.tokenize_state.token_2.clone();
tokenizer.tokenize_state.token_2 = Name::Data;
tokenizer.exit(Name::MdxJsxTagAttribute);
tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
pub fn attribute_value_quoted_start(tokenizer: &mut Tokenizer) -> State {
if let Some(byte) = tokenizer.current {
if byte == tokenizer.tokenize_state.marker {
tokenizer.tokenize_state.marker = 0;
tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker);
tokenizer.exit(Name::MdxJsxTagAttributeValueLiteral);
tokenizer.exit(Name::MdxJsxTagAttribute);
tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok);
State::Next(StateName::MdxJsxEsWhitespaceStart)
} else if byte == b'\n' {
tokenizer.attempt(
State::Next(StateName::MdxJsxAttributeValueQuotedStart),
State::Nok,
);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
} else {
tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralValue);
State::Retry(StateName::MdxJsxAttributeValueQuoted)
}
} else {
crash(
tokenizer,
"in attribute value",
&format!(
"a corresponding closing quote {}",
format_byte(tokenizer.tokenize_state.marker)
),
)
}
}
pub fn attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(tokenizer.tokenize_state.marker)
|| matches!(tokenizer.current, None | Some(b'\n'))
{
tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralValue);
State::Retry(StateName::MdxJsxAttributeValueQuotedStart)
} else {
tokenizer.consume();
State::Next(StateName::MdxJsxAttributeValueQuoted)
}
}
pub fn self_closing(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),
_ => crash(
tokenizer,
"after self-closing slash",
&format!(
"`>` to end the tag{}",
if tokenizer.current == Some(b'*' | b'/') {
" (note: JS comments in JSX tags are not supported in MDX)"
} else {
""
}
),
),
}
}
pub fn tag_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Name::MdxJsxTagMarker);
tokenizer.consume();
tokenizer.exit(Name::MdxJsxTagMarker);
tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
State::Ok
}
_ => unreachable!("expected `>`"),
}
}
pub fn es_whitespace_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Name::LineEnding);
tokenizer.consume();
tokenizer.exit(Name::LineEnding);
State::Next(StateName::MdxJsxEsWhitespaceEolAfter)
}
_ => {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
{
tokenizer.enter(Name::MdxJsxEsWhitespace);
State::Retry(StateName::MdxJsxEsWhitespaceInside)
} else {
State::Ok
}
}
}
}
pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Name::MdxJsxEsWhitespace);
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
Some(0x80..=0xBF) => {
tokenizer.consume();
State::Next(StateName::MdxJsxEsWhitespaceInside)
}
Some(_)
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace =>
{
tokenizer.consume();
State::Next(StateName::MdxJsxEsWhitespaceInside)
}
Some(_) => {
tokenizer.exit(Name::MdxJsxEsWhitespace);
State::Ok
}
None => State::Nok,
}
}
pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.token_1 == Name::MdxJsxFlowTag && tokenizer.lazy {
State::Error(
message::Message {
place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))),
reason: "Unexpected lazy line in jsx in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc".into(),
rule_id: Box::new("unexpected-lazy".into()),
source: Box::new("markdown-rs".into()),
}
)
} else {
State::Retry(StateName::MdxJsxEsWhitespaceStart)
}
}
fn id_start_opt(code: Option<char>) -> bool {
code.map_or(false, id_start)
}
fn id_cont_opt(code: Option<char>) -> bool {
code.map_or(false, |c| id_cont(c, true))
}
fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State {
State::Error(message::Message {
place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))),
reason: format!(
"Unexpected {} {}, expected {}",
format_char_opt(if tokenizer.current.is_none() {
None
} else {
char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
}),
at,
expect
),
rule_id: Box::new(format!(
"unexpected-{}",
if tokenizer.current.is_none() {
"eof"
} else {
"character"
}
)),
source: Box::new("markdown-rs".into()),
})
}