use super::{is_two_arg_rd_macro, scan_balanced, utf8_len};
use crate::parser::lexer::{TokKind, Token};
const ARG_BEARING_TAGS: &[&str] = &[
"param",
"field",
"slot",
"inheritParams",
"inheritSection",
"template",
"templateVar",
"method",
];
fn is_arg_bearing_tag(name: &str) -> bool {
ARG_BEARING_TAGS.contains(&name)
}
pub(crate) fn is_roxygen_comment(text: &str) -> bool {
let after_hashes = text.trim_start_matches('#');
after_hashes.len() < text.len() && after_hashes.starts_with('\'')
}
pub(crate) fn resolve_roxygen_block(input: &str, start: usize) -> (bool, usize) {
let bytes = input.as_bytes();
let mut md = false;
let mut pos = start;
loop {
let line_end = line_run_end(bytes, pos);
let content_end = if input[pos..line_end].ends_with('\r') {
line_end - 1
} else {
line_end
};
if let Some(on) = roxygen_md_directive(&input[pos..content_end]) {
md = on;
}
if line_end >= bytes.len() {
return (md, line_end);
}
let mut next = line_end + 1;
while next < bytes.len() && matches!(bytes[next], b' ' | b'\t') {
next += 1;
}
if next < bytes.len()
&& bytes[next] == b'#'
&& is_roxygen_comment(&input[next..line_run_end(bytes, next)])
{
pos = next;
} else {
return (md, line_end);
}
}
}
fn line_run_end(bytes: &[u8], i: usize) -> usize {
let mut j = i;
while j < bytes.len() && bytes[j] != b'\n' {
j += 1;
}
j
}
fn roxygen_md_directive(line: &str) -> Option<bool> {
let after_hashes = line.trim_start_matches('#');
let body = after_hashes.strip_prefix('\'')?.trim();
match body {
"@md" => Some(true),
"@noMd" => Some(false),
_ => None,
}
}
pub(crate) fn lex_roxygen_line(out: &mut Vec<Token>, text: &str, start: usize, md: bool) {
debug_assert!(is_roxygen_comment(text));
let bytes = text.as_bytes();
let hash_count = text.len() - text.trim_start_matches('#').len();
let marker_len = hash_count + 1; push(out, TokKind::RoxygenMarker, text, start, 0, marker_len);
let pos = take_ws(out, text, start, marker_len);
if pos >= text.len() {
return;
}
if bytes[pos] == b'@' && bytes.get(pos + 1).is_some_and(u8::is_ascii_alphabetic) {
lex_roxygen_tag(out, text, start, pos, md);
} else {
lex_roxygen_prose(out, text, start, pos, md, true);
}
}
fn lex_roxygen_tag(out: &mut Vec<Token>, text: &str, start: usize, mut pos: usize, md: bool) {
let bytes = text.as_bytes();
push(out, TokKind::RoxygenAt, text, start, pos, 1);
pos += 1;
let name_start = pos;
while pos < text.len() && (bytes[pos] as char).is_ascii_alphanumeric() {
pos += 1;
}
let name = text[name_start..pos].to_string();
push(
out,
TokKind::RoxygenTagName,
text,
start,
name_start,
pos - name_start,
);
pos = take_ws(out, text, start, pos);
if pos >= text.len() {
return;
}
if is_arg_bearing_tag(&name) {
let arg_start = pos;
while pos < text.len() && !matches!(bytes[pos], b' ' | b'\t') {
pos += 1;
}
push(
out,
TokKind::RoxygenTagArg,
text,
start,
arg_start,
pos - arg_start,
);
pos = take_ws(out, text, start, pos);
}
lex_roxygen_prose(out, text, start, pos, md, false);
}
fn lex_roxygen_prose(
out: &mut Vec<Token>,
text: &str,
start: usize,
pos: usize,
md: bool,
line_start: bool,
) {
let bytes = text.as_bytes();
let mut run_start = pos;
let mut i = pos;
if md
&& line_start
&& let Some(fence_end) = scan_md_fence(bytes, pos)
{
push(
out,
TokKind::RoxygenMdFence,
text,
start,
pos,
fence_end - pos,
);
return;
}
if md
&& line_start
&& let Some(marker_end) = scan_md_list_marker(bytes, pos)
{
push(
out,
TokKind::RoxygenMdListMarker,
text,
start,
pos,
marker_end - pos,
);
run_start = marker_end;
i = marker_end;
}
while i < bytes.len() {
let span = match bytes[i] {
b'`' if md => scan_inline_code(bytes, i).map(|end| (TokKind::RoxygenMdCode, end)),
b'`' => scan_inline_code(bytes, i).map(|end| (TokKind::RoxygenCode, end)),
b'*' | b'_' if md => scan_md_emphasis(bytes, i),
b'\\' => scan_rd_macro(bytes, i).map(|end| (TokKind::RoxygenRdMacro, end)),
b'!' if md => scan_md_image(bytes, i).map(|end| (TokKind::RoxygenMdImage, end)),
b'[' if md => scan_md_link(bytes, i).map(|end| (TokKind::RoxygenMdLink, end)),
b'<' if md => scan_md_autolink(bytes, i)
.map(|end| (TokKind::RoxygenMdLink, end))
.or_else(|| scan_md_html_inline(bytes, i).map(|end| (TokKind::RoxygenMdHtml, end))),
_ => None,
};
if let Some((kind, end)) = span {
push(
out,
TokKind::RoxygenText,
text,
start,
run_start,
i - run_start,
);
push(out, kind, text, start, i, end - i);
i = end;
run_start = i;
} else {
i += utf8_len(bytes[i]);
}
}
push(
out,
TokKind::RoxygenText,
text,
start,
run_start,
bytes.len() - run_start,
);
}
fn run_len(bytes: &[u8], i: usize, c: u8) -> usize {
let mut j = i;
while j < bytes.len() && bytes[j] == c {
j += 1;
}
j - i
}
fn scan_inline_code(bytes: &[u8], i: usize) -> Option<usize> {
let n = run_len(bytes, i, b'`');
let mut j = i + n;
while j < bytes.len() {
if bytes[j] == b'`' {
let m = run_len(bytes, j, b'`');
if m == n {
return Some(j + m);
}
j += m;
} else {
j += 1;
}
}
None
}
fn scan_md_emphasis(bytes: &[u8], i: usize) -> Option<(TokKind, usize)> {
let delim = bytes[i];
let open_len = run_len(bytes, i, delim);
if open_len >= 3 {
return None; }
let n = open_len; let content_start = i + n;
if bytes
.get(content_start)
.is_none_or(|b| b.is_ascii_whitespace())
{
return None;
}
if delim == b'_' && i > 0 && bytes[i - 1].is_ascii_alphanumeric() {
return None;
}
let mut j = content_start;
while j < bytes.len() {
if bytes[j] == delim {
let run = run_len(bytes, j, delim);
let close_end = j + n;
if run >= n
&& j > content_start
&& !bytes[j - 1].is_ascii_whitespace()
&& (delim != b'_'
|| bytes
.get(close_end)
.is_none_or(|b| !b.is_ascii_alphanumeric()))
{
let kind = if n == 2 {
TokKind::RoxygenMdStrong
} else {
TokKind::RoxygenMdEmph
};
return Some((kind, close_end));
}
j += run;
} else {
j += utf8_len(bytes[j]);
}
}
None
}
fn scan_md_fence(bytes: &[u8], i: usize) -> Option<usize> {
if run_len(bytes, i, b'`') < 3 {
return None;
}
let info = i + run_len(bytes, i, b'`');
if bytes[info..].contains(&b'`') {
return None;
}
Some(bytes.len())
}
fn scan_md_list_marker(bytes: &[u8], i: usize) -> Option<usize> {
let marker_end = match bytes.get(i)? {
b'-' | b'*' | b'+' => i + 1,
b'0'..=b'9' => {
let mut j = i;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j - i > 9 {
return None; }
match bytes.get(j) {
Some(b'.' | b')') => j + 1,
_ => return None,
}
}
_ => return None,
};
match bytes.get(marker_end) {
None | Some(b' ' | b'\t') => Some(marker_end),
_ => None,
}
}
fn scan_md_image(bytes: &[u8], i: usize) -> Option<usize> {
if bytes.get(i + 1) != Some(&b'[') {
return None;
}
let after_alt = scan_balanced(bytes, i + 1, b'[', b']')?;
match bytes.get(after_alt) {
Some(&b'(') => scan_balanced(bytes, after_alt, b'(', b')'),
_ => None,
}
}
fn scan_md_link(bytes: &[u8], i: usize) -> Option<usize> {
let after_text = scan_balanced(bytes, i, b'[', b']')?;
match bytes.get(after_text) {
Some(&b'(') => scan_balanced(bytes, after_text, b'(', b')'),
Some(&b'[') => scan_balanced(bytes, after_text, b'[', b']'),
Some(&b'{') => None,
_ => is_shortcut_content(&bytes[i + 1..after_text - 1]).then_some(after_text),
}
}
fn is_shortcut_content(content: &[u8]) -> bool {
!content.is_empty() && !content.iter().any(|&b| matches!(b, b'[' | b']'))
}
fn scan_md_autolink(bytes: &[u8], i: usize) -> Option<usize> {
let scheme_start = i + 1;
if !bytes.get(scheme_start).is_some_and(u8::is_ascii_alphabetic) {
return None;
}
let mut j = scheme_start + 1;
while j < bytes.len()
&& matches!(bytes[j], b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'.' | b'-')
{
j += 1;
}
if !(2..=32).contains(&(j - scheme_start)) || bytes.get(j) != Some(&b':') {
return None;
}
j += 1;
while j < bytes.len() {
match bytes[j] {
b'>' => return Some(j + 1),
b' ' | b'<' => return None,
c if c.is_ascii_control() => return None,
_ => j += 1,
}
}
None
}
fn scan_md_html_inline(bytes: &[u8], i: usize) -> Option<usize> {
let mut j = i + 1;
let closing = bytes.get(j) == Some(&b'/');
if closing {
j += 1;
}
if !bytes.get(j).is_some_and(u8::is_ascii_alphabetic) {
return None;
}
j += 1;
while bytes
.get(j)
.is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'-')
{
j += 1;
}
if closing {
j = skip_html_ws(bytes, j);
return (bytes.get(j) == Some(&b'>')).then_some(j + 1);
}
loop {
let after_ws = skip_html_ws(bytes, j);
if after_ws == j {
break; }
match scan_html_attribute(bytes, after_ws) {
Some(end) => j = end,
None => {
j = after_ws;
break;
}
}
}
j = skip_html_ws(bytes, j);
if bytes.get(j) == Some(&b'/') {
j += 1;
}
(bytes.get(j) == Some(&b'>')).then_some(j + 1)
}
fn skip_html_ws(bytes: &[u8], i: usize) -> usize {
let mut j = i;
while bytes.get(j).is_some_and(|&b| matches!(b, b' ' | b'\t')) {
j += 1;
}
j
}
fn scan_html_attribute(bytes: &[u8], i: usize) -> Option<usize> {
if !bytes
.get(i)
.is_some_and(|&b| b.is_ascii_alphabetic() || matches!(b, b'_' | b':'))
{
return None;
}
let mut j = i + 1;
while bytes
.get(j)
.is_some_and(|&b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b':' | b'-'))
{
j += 1;
}
let after_name = j;
let eq = skip_html_ws(bytes, j);
if bytes.get(eq) != Some(&b'=') {
return Some(after_name);
}
j = skip_html_ws(bytes, eq + 1);
match bytes.get(j) {
Some(&q @ (b'\'' | b'"')) => {
j += 1;
while bytes.get(j).is_some_and(|&b| b != q) {
j += 1;
}
(bytes.get(j) == Some(&q)).then_some(j + 1)
}
_ => {
let start = j;
while bytes.get(j).is_some_and(|&b| {
!matches!(b, b' ' | b'\t' | b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
}) {
j += 1;
}
(j > start).then_some(j)
}
}
}
pub(crate) fn scan_rd_macro(bytes: &[u8], i: usize) -> Option<usize> {
let name_start = i + 1;
let mut j = super::rd_macro_name_end(bytes, name_start);
if j == name_start {
return None; }
let name = std::str::from_utf8(&bytes[name_start..j]).unwrap_or_default();
if bytes.get(j) != Some(&b'{') && bytes.get(j) != Some(&b'[') {
return (!super::is_known_rd_macro(name)).then_some(j);
}
if bytes.get(j) == Some(&b'[') {
j = scan_balanced(bytes, j, b'[', b']')?;
}
if bytes.get(j) != Some(&b'{') {
return None;
}
let mut end = scan_balanced(bytes, j, b'{', b'}')?;
if is_two_arg_rd_macro(name)
&& bytes.get(end) == Some(&b'{')
&& let Some(second) = scan_balanced(bytes, end, b'{', b'}')
{
end = second;
}
Some(end)
}
fn push(out: &mut Vec<Token>, kind: TokKind, text: &str, start: usize, off: usize, len: usize) {
if len == 0 {
return;
}
out.push(Token {
kind,
text: text[off..off + len].to_string(),
start: start + off,
end: start + off + len,
});
}
fn take_ws(out: &mut Vec<Token>, text: &str, start: usize, pos: usize) -> usize {
let bytes = text.as_bytes();
let mut end = pos;
while end < text.len() && matches!(bytes[end], b' ' | b'\t') {
end += 1;
}
push(out, TokKind::Whitespace, text, start, pos, end - pos);
end
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::lexer::lex;
fn kinds(input: &str) -> Vec<TokKind> {
lex(input).into_iter().map(|t| t.kind).collect()
}
fn assert_lossless(input: &str) {
let joined: String = lex(input).into_iter().map(|t| t.text).collect();
assert_eq!(joined, input, "lexing was not lossless for {input:?}");
}
#[test]
fn recognizes_roxygen_prefix() {
assert!(is_roxygen_comment("#'"));
assert!(is_roxygen_comment("#' x"));
assert!(is_roxygen_comment("#'x"));
assert!(is_roxygen_comment("##' x"));
assert!(!is_roxygen_comment("# 'x"));
assert!(!is_roxygen_comment("# x"));
assert!(!is_roxygen_comment("#!/usr/bin/env Rscript"));
assert!(!is_roxygen_comment("###"));
assert!(!is_roxygen_comment(""));
}
#[test]
fn plain_comment_stays_one_token() {
assert_eq!(kinds("# x\n"), vec![TokKind::Comment, TokKind::Newline]);
assert_eq!(kinds("# 'x\n"), vec![TokKind::Comment, TokKind::Newline]);
}
#[test]
fn simple_roxygen_line() {
assert_eq!(
kinds("#' Title\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
assert_lossless("#' Title\n");
}
#[test]
fn no_space_after_marker() {
assert_eq!(
kinds("#'x\n"),
vec![
TokKind::RoxygenMarker,
TokKind::RoxygenText,
TokKind::Newline
]
);
assert_lossless("#'x\n");
}
#[test]
fn blank_roxygen_line() {
assert_eq!(
kinds("#'\n"),
vec![TokKind::RoxygenMarker, TokKind::Newline]
);
assert_lossless("#'\n");
}
#[test]
fn multi_hash_marker() {
let toks = lex("##' x\n");
assert_eq!(toks[0].kind, TokKind::RoxygenMarker);
assert_eq!(toks[0].text, "##'");
assert_lossless("##' x\n");
}
#[test]
fn arg_bearing_tag() {
assert_eq!(
kinds("#' @param x A number.\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenAt,
TokKind::RoxygenTagName,
TokKind::Whitespace,
TokKind::RoxygenTagArg,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
assert_lossless("#' @param x A number.\n");
}
#[test]
fn non_arg_tag_has_no_arg_token() {
assert_eq!(
kinds("#' @return value\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenAt,
TokKind::RoxygenTagName,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
}
#[test]
fn bare_tag_no_content() {
assert_eq!(
kinds("#' @examples\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenAt,
TokKind::RoxygenTagName,
TokKind::Newline,
]
);
}
#[test]
fn at_escape_and_midline_at_are_text() {
assert_eq!(
kinds("#' @@esc\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
assert_eq!(
kinds("#' a @ b\n"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
}
#[test]
fn crlf_keeps_newline_token_clean() {
let toks = lex("#' Title\r\n");
assert_eq!(
toks.iter().map(|t| t.kind.clone()).collect::<Vec<_>>(),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenText,
TokKind::Newline,
]
);
assert_eq!(toks.last().unwrap().text, "\r\n");
assert_eq!(toks[2].text, "Title");
assert_lossless("#' Title\r\n");
}
#[test]
fn roxygen_at_eof_without_newline() {
assert_eq!(
kinds("#' Title"),
vec![
TokKind::RoxygenMarker,
TokKind::Whitespace,
TokKind::RoxygenText
]
);
assert_lossless("#' Title");
}
fn prose_texts(input: &str) -> Vec<(TokKind, String)> {
lex(input)
.into_iter()
.filter(|t| {
matches!(
t.kind,
TokKind::RoxygenText
| TokKind::RoxygenCode
| TokKind::RoxygenRdMacro
| TokKind::RoxygenMdLink
| TokKind::RoxygenMdImage
| TokKind::RoxygenMdEmph
| TokKind::RoxygenMdStrong
| TokKind::RoxygenMdCode
| TokKind::RoxygenMdListMarker
| TokKind::RoxygenMdFence
| TokKind::RoxygenMdHtml
)
})
.map(|t| (t.kind, t.text))
.collect()
}
#[test]
fn inline_code_span() {
assert_eq!(
prose_texts("#' Use `x + y` now\n"),
vec![
(TokKind::RoxygenText, "Use ".into()),
(TokKind::RoxygenCode, "`x + y`".into()),
(TokKind::RoxygenText, " now".into()),
]
);
assert_lossless("#' Use `x + y` now\n");
}
#[test]
fn md_inline_recognized_under_md_mode() {
let src = "#' a *one*, **two**, and `three` end.\n#' @md\n";
assert_eq!(
prose_texts(src),
vec![
(TokKind::RoxygenText, "a ".into()),
(TokKind::RoxygenMdEmph, "*one*".into()),
(TokKind::RoxygenText, ", ".into()),
(TokKind::RoxygenMdStrong, "**two**".into()),
(TokKind::RoxygenText, ", and ".into()),
(TokKind::RoxygenMdCode, "`three`".into()),
(TokKind::RoxygenText, " end.".into()),
]
);
assert_lossless(src);
}
#[test]
fn md_list_marker_recognized_under_md_mode() {
let bullet = "#' - first step\n#' @md\n";
assert_eq!(
prose_texts(bullet),
vec![
(TokKind::RoxygenMdListMarker, "-".into()),
(TokKind::RoxygenText, " first step".into()),
]
);
assert_lossless(bullet);
let ordered = "#' 1. one\n#' @md\n";
assert_eq!(
prose_texts(ordered),
vec![
(TokKind::RoxygenMdListMarker, "1.".into()),
(TokKind::RoxygenText, " one".into()),
]
);
assert_lossless(ordered);
}
#[test]
fn md_list_marker_off_without_md_directive() {
assert_eq!(
prose_texts("#' - first step\n"),
vec![(TokKind::RoxygenText, "- first step".into())]
);
}
#[test]
fn md_list_marker_requires_space_and_is_not_emphasis() {
let src = "#' * a *b* c\n#' @md\n";
assert_eq!(
prose_texts(src),
vec![
(TokKind::RoxygenMdListMarker, "*".into()),
(TokKind::RoxygenText, " a ".into()),
(TokKind::RoxygenMdEmph, "*b*".into()),
(TokKind::RoxygenText, " c".into()),
]
);
assert_lossless(src);
assert_eq!(
prose_texts("#' -3 degrees\n#' @md\n"),
vec![(TokKind::RoxygenText, "-3 degrees".into())]
);
}
#[test]
fn md_fence_recognized_under_md_mode() {
let opener = "#' ```r\n#' @md\n";
assert_eq!(
prose_texts(opener),
vec![(TokKind::RoxygenMdFence, "```r".into())]
);
assert_lossless(opener);
let closer = "#' ```\n#' @md\n";
assert_eq!(
prose_texts(closer),
vec![(TokKind::RoxygenMdFence, "```".into())]
);
assert_lossless(closer);
}
#[test]
fn md_fence_off_without_md_directive() {
assert_eq!(
prose_texts("#' ```r\n"),
vec![(TokKind::RoxygenText, "```r".into())]
);
}
#[test]
fn md_fence_requires_three_backticks_and_no_inner_backtick() {
let two = "#' `` not a fence\n#' @md\n";
assert_eq!(
prose_texts(two),
vec![(TokKind::RoxygenText, "`` not a fence".into())]
);
let inline = "#' ```code``` inline\n#' @md\n";
assert_eq!(
prose_texts(inline),
vec![
(TokKind::RoxygenMdCode, "```code```".into()),
(TokKind::RoxygenText, " inline".into()),
]
);
assert_lossless(inline);
}
#[test]
fn md_inline_off_without_md_directive() {
assert_eq!(
prose_texts("#' a *one* and `code` end\n"),
vec![
(TokKind::RoxygenText, "a *one* and ".into()),
(TokKind::RoxygenCode, "`code`".into()),
(TokKind::RoxygenText, " end".into()),
]
);
}
#[test]
fn md_emphasis_flanking_rejects_false_positives() {
let src = "#' a * b * c and snake_case_name here\n#' @md\n";
assert_eq!(
prose_texts(src),
vec![(
TokKind::RoxygenText,
"a * b * c and snake_case_name here".into(),
)]
);
assert_lossless(src);
}
#[test]
fn inline_code_multi_backtick_fence() {
assert_eq!(
prose_texts("#' ``a `b` c`` end\n"),
vec![
(TokKind::RoxygenCode, "``a `b` c``".into()),
(TokKind::RoxygenText, " end".into()),
]
);
assert_lossless("#' ``a `b` c`` end\n");
}
#[test]
fn rd_macro_span() {
assert_eq!(
prose_texts("#' See \\code{f} here\n"),
vec![
(TokKind::RoxygenText, "See ".into()),
(TokKind::RoxygenRdMacro, "\\code{f}".into()),
(TokKind::RoxygenText, " here".into()),
]
);
assert_lossless("#' See \\code{f} here\n");
}
#[test]
fn rd_macro_with_pkg_option() {
assert_eq!(
prose_texts("#' \\link[pkg]{f}\n"),
vec![(TokKind::RoxygenRdMacro, "\\link[pkg]{f}".into())]
);
assert_lossless("#' \\link[pkg]{f}\n");
}
#[test]
fn rd_macro_nested_braces() {
assert_eq!(
prose_texts("#' \\code{f(g())} x\n"),
vec![
(TokKind::RoxygenRdMacro, "\\code{f(g())}".into()),
(TokKind::RoxygenText, " x".into()),
]
);
assert_lossless("#' \\code{f(g())} x\n");
}
#[test]
fn md_inline_link() {
assert_eq!(
prose_texts("#' see [the docs](https://x.y) now\n#' @md\n"),
vec".into()),
(TokKind::RoxygenText, " now".into()),
]
);
assert_lossless("#' see [the docs](https://x.y) now\n#' @md\n");
}
#[test]
fn md_function_autolink() {
assert_eq!(
prose_texts("#' Call [func()] and [pkg::g()].\n#' @md\n"),
vec![
(TokKind::RoxygenText, "Call ".into()),
(TokKind::RoxygenMdLink, "[func()]".into()),
(TokKind::RoxygenText, " and ".into()),
(TokKind::RoxygenMdLink, "[pkg::g()]".into()),
(TokKind::RoxygenText, ".".into()),
]
);
assert_lossless("#' Call [func()] and [pkg::g()].\n#' @md\n");
}
#[test]
fn md_url_autolink() {
assert_eq!(
prose_texts("#' see <https://x.y/a> and <p>lit</p>\n#' @md\n"),
vec![
(TokKind::RoxygenText, "see ".into()),
(TokKind::RoxygenMdLink, "<https://x.y/a>".into()),
(TokKind::RoxygenText, " and ".into()),
(TokKind::RoxygenMdHtml, "<p>".into()),
(TokKind::RoxygenText, "lit".into()),
(TokKind::RoxygenMdHtml, "</p>".into()),
]
);
assert_lossless("#' see <https://x.y/a> and <p>lit</p>\n#' @md\n");
}
#[test]
fn md_html_inline_tag() {
assert_eq!(
prose_texts("#' before-<img src='foo.png'>-after\n#' @md\n"),
vec![
(TokKind::RoxygenText, "before-".into()),
(TokKind::RoxygenMdHtml, "<img src='foo.png'>".into()),
(TokKind::RoxygenText, "-after".into()),
]
);
assert_lossless("#' before-<img src='foo.png'>-after\n#' @md\n");
}
#[test]
fn html_inline_is_literal_text_without_md() {
assert_eq!(
prose_texts("#' before-<img src='foo.png'>-after\n"),
vec![(
TokKind::RoxygenText,
"before-<img src='foo.png'>-after".into()
)]
);
assert_lossless("#' before-<img src='foo.png'>-after\n");
}
#[test]
fn malformed_html_stays_literal() {
assert_eq!(
prose_texts("#' x <a b=> y\n#' @md\n"),
vec![(TokKind::RoxygenText, "x <a b=> y".into())]
);
assert_lossless("#' x <a b=> y\n#' @md\n");
}
#[test]
fn autolink_shape_is_literal_text_without_md() {
assert_eq!(
prose_texts("#' see <https://x.y/a> now\n"),
vec![(TokKind::RoxygenText, "see <https://x.y/a> now".into())]
);
assert_lossless("#' see <https://x.y/a> now\n");
}
#[test]
fn md_reference_link() {
assert_eq!(
prose_texts("#' a [text][ref] b\n#' @md\n"),
vec![
(TokKind::RoxygenText, "a ".into()),
(TokKind::RoxygenMdLink, "[text][ref]".into()),
(TokKind::RoxygenText, " b".into()),
]
);
assert_lossless("#' a [text][ref] b\n#' @md\n");
}
#[test]
fn link_shape_is_literal_text_without_md() {
assert_eq!(
prose_texts("#' see [the docs](https://x.y) now\n"),
vec now".into()
)]
);
assert_lossless("#' see [the docs](https://x.y) now\n");
}
#[test]
fn bracketed_prose_is_literal_without_md() {
assert_eq!(
prose_texts("#' see [1] and [a note]\n"),
vec![(TokKind::RoxygenText, "see [1] and [a note]".into())]
);
assert_lossless("#' see [1] and [a note]\n");
}
#[test]
fn md_shortcut_link() {
assert_eq!(
prose_texts("#' see [note], [see this], [pkg::obj] but [x]{y}\n#' @md\n"),
vec![
(TokKind::RoxygenText, "see ".into()),
(TokKind::RoxygenMdLink, "[note]".into()),
(TokKind::RoxygenText, ", ".into()),
(TokKind::RoxygenMdLink, "[see this]".into()),
(TokKind::RoxygenText, ", ".into()),
(TokKind::RoxygenMdLink, "[pkg::obj]".into()),
(TokKind::RoxygenText, " but [x]{y}".into()),
]
);
assert_lossless("#' see [note], [see this], [pkg::obj] but [x]{y}\n#' @md\n");
}
#[test]
fn unterminated_code_stays_prose() {
assert_eq!(
prose_texts("#' a ` b c\n"),
vec![(TokKind::RoxygenText, "a ` b c".into())]
);
assert_lossless("#' a ` b c\n");
}
#[test]
fn unbalanced_macro_stays_prose() {
assert_eq!(
prose_texts("#' \\code{ oops\n"),
vec![(TokKind::RoxygenText, "\\code{ oops".into())]
);
assert_lossless("#' \\code{ oops\n");
}
#[test]
fn backslash_without_name_stays_prose() {
assert_eq!(
prose_texts("#' a \\\\ b \\{ c\n"),
vec![(TokKind::RoxygenText, "a \\\\ b \\{ c".into())]
);
assert_lossless("#' a \\\\ b \\{ c\n");
}
#[test]
fn spans_inside_tag_prose() {
assert_eq!(
prose_texts("#' @param x A \\code{value} to use\n"),
vec![
(TokKind::RoxygenText, "A ".into()),
(TokKind::RoxygenRdMacro, "\\code{value}".into()),
(TokKind::RoxygenText, " to use".into()),
]
);
assert_lossless("#' @param x A \\code{value} to use\n");
}
#[test]
fn mixed_inline_markup_is_lossless() {
assert_lossless("#' Use `x`, \\link[base]{sum}, and [g()] per [d](u).\n");
}
#[test]
fn utf8_prose_around_spans_is_lossless() {
assert_lossless("#' café `x` naïve \\code{f} résumé\n");
}
#[test]
fn prose_recognizers_round_trip_exhaustively() {
let frags = [
"a ",
"`x`",
"`",
"``",
"\\code{f}",
"\\code{",
"\\",
"\\\\",
"[g()]",
"[d](u)",
"[",
"]",
"[1]",
"{",
"}",
"café ",
" ",
"::",
"()",
];
for &a in &frags {
for &b in &frags {
for &c in &frags {
let input = format!("#' {a}{b}{c}\n");
let joined: String = lex(&input).into_iter().map(|t| t.text).collect();
assert_eq!(joined, input, "not lossless for {input:?}");
}
}
}
}
}