use crate::internal::*;
use crate::variants::token::*;
#[derive(Debug, Clone)]
pub struct Line<'arena> {
tokens: Deq<'arena, Token<'arena>>,
orig_len: u32,
term_delim: bool,
pass_macro: bool,
pass_pluses: u8,
}
impl<'arena> Line<'arena> {
pub fn new(tokens: Deq<'arena, Token<'arena>>) -> Self {
Line {
orig_len: tokens.len() as u32,
tokens,
term_delim: false,
pass_macro: false,
pass_pluses: 0,
}
}
pub fn empty(bump: &'arena Bump) -> Self {
Line {
orig_len: 0,
tokens: Deq::new(bump),
term_delim: false,
pass_macro: false,
pass_pluses: 0,
}
}
pub fn with_capacity(capacity: usize, bump: &'arena Bump) -> Self {
Line {
orig_len: 0,
tokens: Deq::with_capacity(capacity, bump),
term_delim: false,
pass_macro: false,
pass_pluses: 0,
}
}
pub fn may_contain_inline_pass(&self) -> bool {
if !self.pass_macro && self.pass_pluses == 0 {
false
} else if self.pass_macro || self.pass_pluses > 1 {
true
} else {
let last = self.last().unwrap();
if last.is_kind_len(Plus, 1) && self.len() > 1 {
!self.nth_token(self.len() - 2).is_whitespaceish()
} else {
true
}
}
}
pub fn push(&mut self, token: Token<'arena>) {
match token.kind {
MacroName if token.lexeme == "pass:" => self.pass_macro = true,
TermDelimiter => self.term_delim = true,
MaybeEmail => return self.finalize_email(token),
Plus if self.tokens.last().not_kind(Backtick) && token.len() < 4 => {
self.pass_pluses = self.pass_pluses.saturating_add(1)
}
_ => {}
}
self.tokens.push(token);
self.orig_len += 1;
}
pub fn restore_front(&mut self, token: Token<'arena>) {
self.tokens.restore_front(token);
}
fn finalize_email(&mut self, mut token: Token<'arena>) {
if self.tokens.is_empty() {
if regx::EMAIL_RE.is_match(&token.lexeme) {
token.kind = Email;
} else {
token.kind = Word;
}
self.tokens.push(token);
self.orig_len += 1;
return;
}
let init_idx = self.tokens.len() - 1;
let mut idx = init_idx;
while idx > 0 {
let Some(prev_token) = self.tokens.get(idx) else {
break;
};
match prev_token.kind {
Dots if prev_token.len() > 1 => break,
Word | Digits | Dots | Underscore | Plus => {
idx -= 1;
}
_ => break,
}
}
if idx == init_idx {
if regx::EMAIL_RE.is_match(&token.lexeme) {
token.kind = Email;
} else {
token.kind = Word;
}
self.tokens.push(token);
self.orig_len += 1;
return;
}
let mut end_idx = idx;
if !matches!(
self.tokens.get(end_idx).unwrap().kind,
Word | Digits | Dots | Underscore | Plus
) {
end_idx += 1;
}
let start = self.tokens.get(end_idx).unwrap().loc.start;
let mut maybe_email = String::with_capacity((token.loc.end - start) as usize);
for t in self.tokens.iter().skip(end_idx) {
maybe_email.push_str(&t.lexeme);
}
maybe_email.push_str(&token.lexeme);
if !regx::EMAIL_RE.is_match(&maybe_email) {
token.kind = Word;
self.tokens.push(token);
self.orig_len += 1;
return;
}
let start_token = self.tokens.get_mut(end_idx).unwrap();
start_token.lexeme.clear();
start_token.loc.end = token.loc.end;
start_token.lexeme.push_str(&maybe_email);
start_token.kind = Email;
self.tokens.truncate(end_idx + 1);
self.orig_len = (end_idx + 1) as u32;
}
pub fn push_nonpass(&mut self, token: Token<'arena>) {
self.tokens.push(token);
self.orig_len += 1;
}
pub fn last(&self) -> Option<&Token<'arena>> {
self.tokens.last()
}
pub fn pop(&mut self) -> Option<Token<'arena>> {
self.tokens.pop()
}
pub fn drain_into(self, tokens: &mut Deq<'arena, Token<'arena>>) {
tokens.extend(self.tokens.into_iter());
}
pub fn into_bytes(self) -> BumpVec<'arena, u8> {
let mut bytes = BumpVec::new_in(self.tokens.bump);
if let (Some(first), Some(last)) = (self.tokens.first(), self.tokens.last()) {
bytes.reserve((last.loc.end.saturating_sub(first.loc.start)) as usize);
}
for token in self.tokens.iter() {
bytes.extend_from_slice(token.lexeme.as_bytes());
}
bytes
}
pub const fn bump_arena(&self) -> &'arena Bump {
self.tokens.bump
}
pub fn src_eq(&self, other: &Self) -> bool {
if self.tokens.len() != other.tokens.len() {
return false;
}
if self.src_len() != other.src_len() {
return false;
}
for (a, b) in self.tokens.iter().zip(other.tokens.iter()) {
if a.lexeme != b.lexeme {
return false;
}
}
true
}
pub fn current_token(&self) -> Option<&Token<'arena>> {
self.tokens.get(0)
}
pub fn current_token_mut(&mut self) -> Option<&mut Token<'arena>> {
self.tokens.get_mut(0)
}
pub fn peek_token(&self) -> Option<&Token<'arena>> {
self.tokens.get(1)
}
pub fn last_token(&self) -> Option<&Token<'arena>> {
self.tokens.last()
}
pub fn nth_token(&self, n: usize) -> Option<&Token<'arena>> {
self.tokens.get(n)
}
pub fn num_tokens(&self) -> usize {
self.tokens.len()
}
pub fn num_tokens_before_trailing_whitespace(&self) -> usize {
self.num_tokens() - usize::from(self.last_token().kind(Whitespace))
}
pub fn current_is(&self, kind: TokenKind) -> bool {
self.current_token().kind(kind)
}
pub fn current_satisfies(&self, spec: TokenSpec) -> bool {
self.current_token().satisfies(spec)
}
pub fn unadjusted_heading_level(&self) -> Option<u8> {
if self.starts_with_seq(&[Kind(EqualSigns), Kind(Whitespace)]) && self.num_tokens() > 2 {
Some((self.current_token().unwrap().lexeme.len() - 1) as u8)
} else if self.starts(Hash)
&& self.contains_seq(&[Kind(Hash), Kind(Whitespace)])
&& self.num_tokens() > 2
{
let num_hashes = self.tokens.iter().take_while(|t| t.kind == Hash).count();
if self.num_tokens() > num_hashes + 1 && self.nth_token(num_hashes).kind(Whitespace) {
Some((num_hashes - 1) as u8)
} else {
None
}
} else {
None
}
}
pub fn is_empty(&self) -> bool {
self.tokens.is_empty()
}
pub fn is_emptyish(&self) -> bool {
self.is_empty() || self.tokens.iter().all(|t| t.is_whitespaceish())
}
pub fn is_heading(&self) -> bool {
self.unadjusted_heading_level().is_some()
}
pub fn is_block_macro(&self) -> bool {
self.starts_with_seq(&[Kind(MacroName), Kind(Colon)])
&& self.current_token().can_start_block_macro()
&& self.contains(OpenBracket)
&& self.ends_with_nonescaped(CloseBracket)
}
pub fn is_block_attr_list(&self) -> bool {
self.is_fully_unconsumed()
&& self.starts(OpenBracket)
&& self.ends_with_nonescaped(CloseBracket)
&& !self.peek_token().kind(OpenBracket)
}
pub fn is_block_anchor(&self) -> bool {
self.starts_with_seq(&[Kind(OpenBracket); 2])
&& self.num_tokens() > 4
&& !matches!(
self.nth_token(2).unwrap().kind,
SingleQuote | DoubleQuote | Whitespace
)
&& self.ends_with_nonescaped(CloseBracket)
&& self.nth_token(self.num_tokens() - 2).kind(CloseBracket)
&& (self.len() > 5 || self.nth_token(2).kind(Word))
}
pub fn is_chunk_title(&self) -> bool {
self.current_satisfies(Len(1, Dots))
&& self.iter().len() > 1
&& self.peek_token().satisfies(Not(Whitespace))
&& self.is_fully_unconsumed()
}
pub fn is_delimiter(&self, delimiter: Delimiter) -> bool {
self.num_tokens() == 1 && self.current_token().unwrap().to_delimiter() == Some(delimiter)
}
pub fn is_delimiter_kind(&self, delimiter_kind: DelimiterKind) -> bool {
self.num_tokens() == 1
&& self.current_token().unwrap().to_delimiter().map(|d| d.kind) == Some(delimiter_kind)
}
pub fn is_any_delimiter(&self) -> bool {
self.num_tokens() == 1 && self.current_token().unwrap().to_delimiter_kind().is_some()
}
pub fn is_comment_block_delimiter(&self) -> bool {
self.is_delimiter_kind(DelimiterKind::Comment)
}
pub fn is_indented(&self) -> bool {
self.starts(Whitespace) && self.num_tokens() > 1
}
pub fn discard(&mut self, n: usize) {
for _ in 0..n {
_ = self.consume_current();
}
}
pub fn discard_assert(&mut self, kind: TokenKind) -> Token<'arena> {
let token = self.consume_current().unwrap();
assert!(
token.kind == kind,
"expected token kind {:?}, found {:?}",
kind,
token.kind
);
token
}
pub fn discard_last(&mut self) -> Option<Token<'arena>> {
self.tokens.pop()
}
pub fn discard_assert_last(&mut self, kind: TokenKind) {
let token = self.discard_last();
debug_assert!(token.unwrap().kind(kind));
}
pub fn contains_nonescaped(&self, token_type: TokenKind) -> bool {
self.first_nonescaped(token_type).is_some()
}
pub fn ends_with_nonescaped(&self, token_type: TokenKind) -> bool {
match self.iter().len() {
0 => false,
1 => self.current_is(token_type),
n => self.last_token().kind(token_type) && self.nth_token(n - 2).not_kind(Backslash),
}
}
pub fn len(&self) -> usize {
self.tokens.len()
}
pub fn iter(&self) -> impl ExactSizeIterator<Item = &Token<'arena>> {
self.tokens.iter()
}
pub fn iter_mut(&mut self) -> impl ExactSizeIterator<Item = &mut Token<'arena>> {
self.tokens.iter_mut()
}
pub fn into_iter(self) -> impl ExactSizeIterator<Item = Token<'arena>> {
self.tokens.into_iter()
}
pub fn first_nonescaped(&self, kind: TokenKind) -> Option<(&Token<'_>, usize)> {
let mut prev: Option<TokenKind> = None;
for (i, token) in self.iter().enumerate() {
if token.kind(kind) && prev != Some(Backslash) {
return Some((token, i));
}
prev = Some(token.kind);
}
None
}
pub fn has_seq_at(&self, specs: &[TokenSpec], offset: u32) -> bool {
if specs.is_empty() || self.len() < offset as usize + specs.len() {
return false;
}
for (i, spec) in specs.iter().enumerate() {
let token = self.tokens.get(i + offset as usize).unwrap();
if !token.satisfies(*spec) {
return false;
}
}
true
}
pub fn contains(&self, kind: TokenKind) -> bool {
self.iter().any(|t| t.kind == kind)
}
pub fn contains_len(&self, kind: TokenKind, len: usize) -> bool {
self.iter().any(|t| t.kind == kind && t.lexeme.len() == len)
}
pub fn starts(&self, kind: TokenKind) -> bool {
self.current_is(kind)
}
pub fn starts_with(&self, predicate: impl Fn(&Token<'arena>) -> bool) -> bool {
self.current_token().map(predicate).unwrap_or(false)
}
pub fn is_comment(&self) -> bool {
self.is_fully_unconsumed() && self.current_satisfies(Len(2, ForwardSlashes))
}
pub fn ends(&self, kind: TokenKind) -> bool {
self.last_token().kind(kind)
}
pub fn starts_with_seq(&self, tokens: &[TokenSpec]) -> bool {
self.has_seq_at(tokens, 0)
}
pub fn contains_seq(&self, specs: &[TokenSpec]) -> bool {
self.index_of_seq(specs).is_some()
}
pub fn index_of_kind(&self, kind: TokenKind) -> Option<usize> {
self.iter().position(|t| t.kind(kind))
}
pub fn index_of_seq(&self, specs: &[TokenSpec]) -> Option<usize> {
self.index_of_seq_from(specs, 0)
}
fn index_of_seq_from(&self, specs: &[TokenSpec], from: usize) -> Option<usize> {
assert!(!specs.is_empty());
let len = self.len();
if len < specs.len() {
return None;
}
let first_spec = specs.first().unwrap();
'outer: for i in from..len {
let token = self.tokens.get(i).unwrap();
if token.satisfies(*first_spec) {
if len - i < specs.len() {
return None;
}
for (j, spec) in specs.iter().skip(1).enumerate() {
if !self.tokens.get(i + j + 1).unwrap().satisfies(*spec) {
continue 'outer;
}
}
return Some(i);
}
}
None
}
pub fn continues_valid_callout_nums(&self) -> bool {
for token in self.iter() {
if token.kind(Whitespace) || token.kind(CalloutNumber) {
continue;
} else {
return false;
}
}
true
}
pub fn continues_inline_macro(&self, prev: &Token) -> bool {
if self.current_is(Whitespace) {
return false;
}
let Some(open_idx) = self.index_of_kind(OpenBracket) else {
return false;
};
let Some(end_idx) = self.index_of_seq(&[Not(Backslash), Kind(CloseBracket)]) else {
return false;
};
if end_idx < open_idx {
false
} else {
!self.current_is(Colon) || prev.lexeme.as_str() == "xref:"
}
}
pub fn continues_xref_shorthand(&self) -> bool {
self.current_is(LessThan)
&& self.num_tokens() > 3
&& self.contains_seq(&[Kind(GreaterThan), Kind(GreaterThan)])
&& self.nth_token(1).not_kind(GreaterThan)
&& self.nth_token(1).not_kind(LessThan)
&& self.nth_token(1).not_kind(Whitespace)
}
pub fn no_whitespace_until(&self, kind: TokenKind) -> bool {
let mut skip_next = false;
for token in self.iter() {
if skip_next {
skip_next = false;
continue;
} else if token.kind(kind) {
return true;
} else if token.kind(AttrRef) && token.lexeme == "{sp}" {
skip_next = true;
continue;
} else if token.kind(Whitespace) {
return false;
} else {
continue;
}
}
false
}
pub fn terminates_constrained(&self, stop_tokens: &[TokenSpec], ctx: &InlineCtx) -> bool {
self.terminates_constrained_in(stop_tokens, ctx).is_some()
}
pub fn terminates_constrained_in(
&self,
stop_tokens: &[TokenSpec],
ctx: &InlineCtx,
) -> Option<usize> {
let mut search_from = 0;
loop {
let n = match self.index_of_seq_from(stop_tokens, search_from) {
Some(0) | None => return None,
Some(n) => n,
};
let next = self.nth_token(n + 1);
let prev = self.nth_token(n - 1);
if stop_tokens.len() == 1
&& prev.is_some_and(|t| stop_tokens[0] == Kind(t.kind) && !t.is_attr_replacement())
{
search_from = n + 1;
continue;
}
if next.kind(Word) || prev.is_whitespaceish() {
return None;
}
if let Some(token) = next
&& matches!(token.kind, Underscore | Star | Backtick | Hash)
&& stop_tokens == [Kind(token.kind)]
&& !token.is_attr_replacement()
{
search_from = n + 1;
continue;
}
if next.kind(SingleQuote) && stop_tokens == [Kind(Backtick)] {
return None;
}
return match ctx.specs() {
Some(specs) => self.index_of_seq(specs).map_or(Some(n), |m| {
if m < n && !self.nth_token(m).is_attr_replacement() {
None
} else {
Some(n)
}
}),
None => Some(n),
};
}
}
pub fn terminates_index_term(&self) -> Option<usize> {
if let Some(n) = self.index_of_seq(&[Kind(CloseParens)]) {
let close_parens = self.nth_token(n).unwrap();
if close_parens.len() < 2 {
None
} else if n > 0 && self.nth_token(n - 1).not_kind(Backslash) {
Some(close_parens.len())
} else {
Some(close_parens.len())
}
} else {
None
}
}
#[must_use]
pub fn consume_to_string_until(
&mut self,
kind: TokenKind,
bump: &'arena Bump,
) -> SourceString<'arena> {
let mut loc = self.first_loc().expect("no tokens to consume");
let mut s = BumpString::new_in(bump);
while let Some(token) = self.consume_if_not(kind) {
if token.kind != AttrRef && token.kind != Backslash {
s.push_str(&token.lexeme);
}
loc.extend(token.loc);
}
SourceString::new(s, loc)
}
#[must_use]
pub fn consume_to_string_until_one_of(
&mut self,
spec: &[TokenSpec],
bump: &'arena Bump,
) -> SourceString<'arena> {
let mut loc = self.first_loc().expect("no tokens to consume");
let mut s = BumpString::new_in(bump);
loop {
let Some(peek) = self.current_token() else {
break;
};
if peek.satisfies_any(spec) {
break;
}
let token = self.consume_current().unwrap();
s.push_str(&token.lexeme);
loc.extend(token.loc);
}
SourceString::new(s, loc)
}
#[must_use]
pub fn consume_to_string(&mut self, bump: &'arena Bump) -> SourceString<'arena> {
let mut loc = self.first_loc().expect("no tokens to consume");
let mut s = BumpString::new_in(bump);
while let Some(token) = self.consume_current() {
s.push_str(&token.lexeme);
loc.extend(token.loc);
}
SourceString::new(s, loc)
}
pub fn consume_if(&mut self, kind: TokenKind) -> Option<Token<'_>> {
match self.current_token() {
Some(token) if token.kind(kind) => self.consume_current(),
_ => None,
}
}
pub fn consume_if_not(&mut self, kind: TokenKind) -> Option<Token<'_>> {
match self.current_token() {
Some(token) if !token.kind(kind) => self.consume_current(),
_ => None,
}
}
#[must_use]
pub fn consume_macro_target(&mut self, bump: &'arena Bump) -> SourceString<'arena> {
let target = self.consume_to_string_until(OpenBracket, bump);
self.discard_assert(OpenBracket);
target
}
#[must_use]
pub fn consume_optional_macro_target(
&mut self,
bump: &'arena Bump,
) -> Option<SourceString<'arena>> {
let target = match self.current_is(OpenBracket) {
true => None,
false => Some(self.consume_to_string_until(OpenBracket, bump)),
};
self.discard_assert(OpenBracket);
target
}
#[must_use]
pub fn consume_url(
&mut self,
start: Option<&Token>,
stop: Option<TokenKind>,
bump: &'arena Bump,
) -> SourceString<'arena> {
let mut loc = start.map_or_else(|| self.first_loc().unwrap(), |t| t.loc);
let mut num_tokens = 0;
if let Some(stop) = stop {
num_tokens = self.index_of_kind(stop).unwrap_or(0);
} else {
for token in self.iter() {
match token.kind {
Whitespace | GreaterThan | OpenBracket | OpenParens | CloseParens | Bang | SemiColon
| Colon | Star | QuestionMark => break,
_ => num_tokens += 1,
}
}
}
if stop.is_none() && num_tokens > 0 && self.tokens.get(num_tokens - 1).kind(Dots) {
num_tokens -= 1;
}
let mut s = BumpString::new_in(bump);
if let Some(start) = start {
s.push_str(&start.lexeme);
loc.extend(start.loc);
}
for _ in 0..num_tokens {
let token = self.consume_current().unwrap();
s.push_str(&token.lexeme);
loc.extend(token.loc);
}
SourceString::new(s, loc)
}
#[must_use]
pub fn consume_current(&mut self) -> Option<Token<'arena>> {
self.tokens.pop_front()
}
pub fn into_lines(self) -> ContiguousLines<'arena> {
let mut lines = Deq::with_capacity(1, self.tokens.bump);
lines.push(self);
ContiguousLines::new(lines)
}
pub fn first_loc(&self) -> Option<SourceLocation> {
self.current_token().map(|t| t.loc)
}
pub fn last_loc(&self) -> Option<SourceLocation> {
self.last_token().map(|t| t.loc)
}
pub fn loc(&self) -> Option<SourceLocation> {
self
.first_loc()
.zip(self.last_loc())
.map(|(start, end)| SourceLocation::spanning(start, end))
}
pub fn src_len(&self) -> usize {
if self.tokens.is_empty() {
0
} else {
self.tokens.iter().map(|token| token.lexeme.len()).sum()
}
}
pub fn reassemble_src(&self) -> BumpString<'arena> {
let mut src = BumpString::with_capacity_in(self.src_len(), self.tokens.bump);
for token in self
.tokens
.iter()
.filter(|t| !matches!(t.kind, AttrRef | Discard))
{
src.push_str(&token.lexeme);
}
src
}
pub fn list_marker(&self) -> Option<ListMarker> {
if self.is_comment() || self.current_is(EqualSigns) {
return None;
}
let mut offset = 0;
if self.current_token().kind(Whitespace) {
offset += 1;
}
let token = self.nth_token(offset)?;
let second = self.nth_token(offset + 1);
let third = self.nth_token(offset + 2);
match token.kind {
Star
if second.kind(Whitespace)
&& third.is_some()
&& !(self.num_tokens_before_trailing_whitespace() == offset + 5
&& third.kind(Star)
&& self.nth_token(offset + 3).kind(Whitespace)
&& self.nth_token(offset + 4).kind(Star)) =>
{
Some(ListMarker::Star(1))
}
Dots if second.kind(Whitespace) && third.is_some() => {
Some(ListMarker::Dot(token.len() as u8))
}
Dashes
if second.kind(Whitespace)
&& token.len() == 1
&& third.is_some_and(|t| !t.is_kind_len(Dashes, 1)) =>
{
Some(ListMarker::Dash)
}
Star if second.kind(Star) => {
let src = self.reassemble_src();
let captures = regx::REPEAT_STAR_LI_START.captures(&src)?;
Some(ListMarker::Star(captures.get(1).unwrap().len() as u8))
}
CalloutNumber if second.kind(Whitespace) && token.lexeme.as_bytes()[1] != b'!' => {
Some(ListMarker::Callout(token.parse_callout_num()))
}
Digits if second.kind(Dots) && third.kind(Whitespace) => {
token.lexeme.parse::<u16>().ok().map(ListMarker::Digits)
}
TermDelimiter => None, _ if self.term_delim => {
for token in self.iter().skip(offset) {
if token.kind(TermDelimiter) {
return match token.lexeme.as_str() {
"::" => Some(ListMarker::Colons(2)),
":::" => Some(ListMarker::Colons(3)),
"::::" => Some(ListMarker::Colons(4)),
";;" => Some(ListMarker::SemiColons),
_ => unreachable!(),
};
}
}
None
}
_ => None,
}
}
pub fn starts_list_item(&self) -> bool {
self.list_marker().is_some()
}
pub fn starts_description_list_item(&self) -> bool {
self
.list_marker()
.map(|marker| marker.is_description())
.unwrap_or(false)
}
pub fn continues_list_item_principle(&self) -> bool {
if self.is_comment() {
return false;
}
match self.current_token().map(|t| t.kind) {
None | Some(Plus) => false,
Some(OpenBracket) => !self.is_block_attr_list(),
_ => !self.starts_list_item() && !self.is_any_delimiter(),
}
}
pub fn is_list_continuation(&self) -> bool {
self.num_tokens() == 1 && self.starts(Plus)
}
pub fn trim_leading_whitespace(&mut self) {
while self.current_is(Whitespace) {
self.discard(1);
}
}
pub fn discard_leading_whitespace(&mut self) {
if self.current_is(Whitespace) {
self.tokens.get_mut(0).unwrap().kind = Discard;
}
}
pub fn drop_leading_bytes(&mut self, n: u32) {
if n > 0 {
debug_assert!(n as usize <= self.current_token().unwrap().lexeme.len());
self.tokens.get_mut(0).unwrap().drop_leading_bytes(n);
}
}
pub fn starts_nested_list(&self, stack: &ListStack) -> bool {
self
.list_marker()
.map(|marker| stack.starts_nested_list(marker))
.unwrap_or(false)
}
pub fn consume_checklist_item(
&mut self,
bump: &'arena Bump,
) -> Option<(bool, SourceString<'arena>)> {
if !self.starts(OpenBracket) || !self.has_seq_at(&[Kind(CloseBracket), Kind(Whitespace)], 2) {
return None;
}
let inside = self.nth_token(1).unwrap();
let (src, checked) = match inside {
Token { kind: Star, .. } => ("[*]", true),
Token { kind: Whitespace, .. } => ("[ ]", false),
Token { kind: Word, lexeme, .. } if *lexeme == "x" => ("[x]", true),
_ => return None,
};
let mut loc = self.first_loc().unwrap();
loc.end += 2;
self.discard(3);
let src = BumpString::from_str_in(src, bump);
Some((checked, SourceString::new(src, loc)))
}
pub fn extract_line_before(&mut self, seq: &[TokenSpec]) -> Line<'arena> {
let mut line = Line::with_capacity(self.num_tokens(), self.tokens.bump);
while !self.starts_with_seq(seq) {
line.push(self.consume_current().unwrap());
}
line
}
pub fn is_partially_consumed(&self) -> bool {
self.tokens.len() < self.orig_len as usize
}
pub fn is_fully_unconsumed(&self) -> bool {
self.tokens.len() == self.orig_len as usize
}
pub fn continues_formatted_text_attr_list(&self) -> bool {
if !self.current_satisfies(NotOneOf(&[OpenBracket, CloseBracket])) {
return false;
}
let Some(idx) = self.index_of_seq(&[
Kind(CloseBracket),
OneOf(&[Hash, Star, Underscore, Backtick, Tilde, Caret]),
]) else {
return false;
};
idx != 0 && self.first_nonescaped(CloseBracket).map(|(_, i)| i) == Some(idx)
}
pub fn continues_inline_anchor(&self) -> bool {
if !self.starts_with_seq(&[Kind(OpenBracket), OneOf(&[Word, Colon])]) {
return false;
}
let Some(close_idx) = self.index_of_seq(&[Kind(CloseBracket); 2]) else {
return false;
};
let Some((_, prev_idx)) = self.first_nonescaped(CloseBracket) else {
return true;
};
prev_idx == close_idx
}
pub fn trim_for_cell(&mut self, style: CellContentStyle) {
if matches!(style, CellContentStyle::Literal) {
while self.current_is(Newline) {
self.discard(1);
}
}
while self.last_token().is_whitespaceish() {
self.discard_last();
}
}
pub fn remove_resolved_attr_refs(&mut self) {
let mut attr_loc: Option<SourceLocation> = None;
let bump = self.tokens.bump;
for token in self.iter_mut() {
if token.kind(AttrRef) {
attr_loc = Some(token.loc);
} else if attr_loc.is_some_and(|loc| loc == token.loc) {
token.lexeme = BumpString::from_str_in("", bump);
} else {
attr_loc = None;
}
}
}
pub fn remove_first_token(&mut self) {
if !self.tokens.is_empty() {
self.tokens.remove_first();
}
}
pub fn get_indentation(&self) -> usize {
self
.current_token()
.filter(|t| t.kind(Whitespace))
.map_or(0, |t| t.lexeme.len())
}
pub fn set_indentation(&mut self, indent: usize) {
let Some(token) = self.current_token_mut() else {
return;
};
let token_len = token.len();
if token.kind(Whitespace) && token_len > indent {
let delta = token_len - indent;
if delta == token_len {
self.tokens.remove_first();
} else {
token.loc.start += delta as u32;
for _ in 0..delta {
token.lexeme.pop();
}
}
} else if token.kind(Whitespace) && token_len < indent {
token.lexeme.push_str(&" ".repeat(indent - token_len));
} else if !token.kind(Whitespace) && indent != 0 {
let loc = token.loc.clamp_start();
self.tokens.slowly_push_front(Token::new(
Whitespace,
loc,
BumpString::from_str_in(&" ".repeat(indent), self.tokens.bump),
));
}
}
pub fn is_attr_decl(&self) -> bool {
self.starts(TokenKind::AttrDef)
}
pub fn is_directive_endif(&self) -> bool {
self.directive_endif_target().is_some()
}
pub fn directive_endif_target(&self) -> Option<BumpString<'arena>> {
if !self
.current_token()
.matches(TokenKind::Directive, "endif::")
|| self.num_tokens() < 3
{
return None;
}
let src = self.reassemble_src();
regx::DIRECTIVE_ENDIF.captures(&src).map(|captures| {
let attrs = captures.get(1).map_or("", |c| c.as_str());
BumpString::from_str_in(attrs, self.tokens.bump)
})
}
}
#[cfg(test)]
mod tests {
use crate::internal::*;
use crate::token::{TokenKind::*, TokenSpec::*, *};
use test_utils::*;
#[test]
fn set_indentation() {
let cases = vec![
(" foo", 2, " foo", 2..4),
(" foo", 1, " foo", 3..4),
(" foo", 2, " foo", 0..2),
(" foo", 0, "foo", 2..5),
("foo ", 2, " foo ", 0..0),
(" foo", 4, " foo", 0..2),
("foo", 4, " foo", 0..0),
];
for (input, indent, expected, range) in cases {
let mut line = read_line!(input);
line.set_indentation(indent);
expect_eq!(line.reassemble_src(), expected, from: input);
expect_eq!(line.current_token().unwrap().loc, loc!(range), from: input);
}
}
#[test]
fn test_continues_list_item_principle() {
let cases = vec![
("foo", true),
(" foo", true),
(" foo", true),
("* foo", false),
(" * foo", false),
("- foo", false),
("// foo", false),
("[circles]", false),
("term::", false),
("term:: desc", false),
("====", false),
];
for (input, expected) in cases {
let line = read_line!(input);
expect_eq!(line.continues_list_item_principle(), expected, from: input);
}
}
#[test]
fn test_starts_nested_list() {
use ListMarker::*;
let cases: Vec<(&str, &[ListMarker], bool)> = vec![
("* foo", &[Star(1)], false),
("** foo", &[Star(1)], true),
("* foo", &[Star(2)], true),
(". foo", &[Star(2), Star(1)], true),
("2. foo", &[Digits(1)], false),
("<2> bar", &[Callout(Some(1))], false),
];
for (input, markers, expected) in cases {
let mut stack = ListStack::default();
for marker in markers {
stack.push(*marker);
}
let line = read_line!(input);
expect_eq!(line.starts_nested_list(&stack), expected, from: input);
}
}
#[test]
fn test_list_marker() {
use ListMarker::*;
let cases = vec![
("* foo", Some(Star(1))),
("** foo", Some(Star(2))),
(". foo", Some(Dot(1))),
(".. foo", Some(Dot(2))),
("... foo", Some(Dot(3))),
("- foo", Some(Dash)),
("1. foo", Some(Digits(1))),
("999. foo", Some(Digits(999))),
("2. foo", Some(Digits(2))),
("--- foo", None),
("33.44. foo", None),
(":: bar", None),
("* ", None),
("** ", None),
("*** ", None),
("- - -", None), ("* * *", None), ("* * * ", None), ("* *foo*", Some(Star(1))),
("* *foo* bar", Some(Star(1))),
("* **foo**", Some(Star(1))),
("* * foo", Some(Star(1))),
(" ", None),
(". ", None),
(".. ", None),
("... ", None),
("- ", None),
("= :: bar", None),
("foo:: bar", Some(Colons(2))),
("foo::", Some(Colons(2))),
("image:: baz", Some(Colons(2))),
("image::cat.png[]", None),
("foo::: bar", Some(Colons(3))),
("foo:::: bar", Some(Colons(4))),
("foo;; bar", Some(SemiColons)),
("_foo_::", Some(Colons(2))),
("foo bar:: baz", Some(Colons(2))),
("<1> foo", Some(Callout(Some(1)))),
("<.> foo", Some(Callout(None))),
("<!--3--> foo", None), ("<255> foo", Some(Callout(Some(255)))),
("<.>", None),
];
for (input, marker) in cases {
let line = read_line!(input);
expect_eq!(line.list_marker(), marker, from: input);
}
}
#[test]
fn test_starts_list_item() {
let cases = vec![
("* foo", true),
("foo", false),
("- foo", true),
("-- foo", false),
(" - foo", true),
(". foo", true),
("**** foo", true),
("1. foo", true),
("999. foo", true),
(" * foo", true),
(" * foo", true),
("* {foo}", true),
(". {foo}", true),
("*foo", false),
(".foo", false),
("-foo", false),
(" ::", false),
("//foo:: bar", false),
];
for (input, expected) in cases {
let line = read_line!(input);
expect_eq!(line.starts_list_item(), expected, from: input);
}
}
#[test]
fn test_discard() {
let mut line = read_line!("foo bar\nso baz\n");
expect_eq!(line.reassemble_src(), "foo bar");
expect_eq!(line.num_tokens(), 3);
line.discard(1);
expect_eq!(line.reassemble_src(), " bar");
expect_eq!(line.num_tokens(), 2);
line.discard(2);
expect_eq!(line.reassemble_src(), "");
expect_eq!(line.num_tokens(), 0);
}
#[test]
fn test_discard_last() {
let mut line = read_line!("'foo'");
expect_eq!(line.reassemble_src(), "'foo'");
line.discard_last();
expect_eq!(line.reassemble_src(), "'foo");
line.discard_last();
expect_eq!(line.reassemble_src(), "'");
}
#[test]
fn test_line_has_seq_at() {
let cases: Vec<(&str, &[TokenSpec], u32, bool)> = vec![
("foo bar_:", &[Kind(Word), Kind(Whitespace)], 0, true),
("foo bar_:", &[Kind(Word), Kind(Whitespace)], 1, false),
("foo bar", &[Kind(Whitespace), Kind(Word)], 1, true),
(
"foo bar_:",
&[Kind(Word), Kind(Underscore), Kind(Colon)],
2,
true,
),
(
"foo bar_:",
&[Kind(Word), Kind(Underscore), Kind(Colon)],
0,
false,
),
("#", &[Kind(Hash)], 0, true),
];
for (input, token_types, pos, expected) in cases {
let line = read_line!(input);
expect_eq!(line.has_seq_at(token_types, pos), expected);
}
let mut line = read_line!("foo_#");
line.discard(2); assert!(line.has_seq_at(&[Kind(Hash)], 0));
}
#[test]
fn test_ends_nonescaped() {
let cases: Vec<(&str, TokenKind, bool)> = vec![
("x", CloseBracket, false),
("]", CloseBracket, true),
("\\]", CloseBracket, false),
("l]", CloseBracket, true),
];
for (input, token_type, expected) in cases {
let line = read_line!(input);
expect_eq!(line.ends_with_nonescaped(token_type), expected);
}
}
#[test]
fn test_line_terminates_constrained_in() {
let cases: Vec<(&str, &[TokenSpec], Option<usize>)> = vec![
("foo_ bar", &[Kind(Underscore)], Some(1)),
("foo_bar bar", &[Kind(Underscore)], None),
("`+CB###2+`#", &[Kind(Hash)], Some(9)),
("bar__", &[Kind(Underscore)], None),
("foo##", &[Kind(Hash)], None),
];
for (input, specs, expected) in cases {
let line = read_line!(input);
expect_eq!(line.terminates_constrained_in(specs, &InlineCtx::None), expected, from: input);
}
}
#[test]
fn test_line_contains_seq() {
let cases: Vec<(&str, &[TokenSpec], bool)> = vec![
("_bar__r", &[Kind(Underscore), Kind(Underscore)], true),
("foo bar_:", &[Kind(Word), Kind(Whitespace)], true),
(
"foo bar_:",
&[Kind(Word), Kind(Whitespace), Kind(Word)],
true,
),
("foo bar_:", &[Kind(Word)], true),
("foo bar_:", &[Kind(Underscore), Kind(Colon)], true),
("foo bar_:", &[Kind(Underscore), Kind(Word)], false),
(
"foo bar_:",
&[Kind(Whitespace), Kind(Word), Kind(Underscore)],
true,
),
(
"foo ",
&[Kind(Word), Kind(Whitespace), Kind(Underscore), Kind(Colon)],
false,
),
];
for (input, token_types, expected) in cases {
let line = read_line!(input);
expect_eq!(line.contains_seq(token_types), expected, from: input);
}
}
#[test]
fn test_size_of_line() {
assert!(std::mem::size_of::<Line>() <= 56);
}
#[test]
fn test_email_detection() {
let cases: Vec<(&str, &[TokenSpec], u32)> = vec![
("bob@bob.com", &[Kind(Email)], 1),
("bob@bob", &[Kind(Word)], 1),
("bob.lob@bob.com", &[Kind(Email)], 1),
(
"foo bob.lob@bob.com",
&[Kind(Word), Kind(Whitespace), Kind(Email)],
3,
),
(
"foo baz_bob.lob@bob.com",
&[Kind(Word), Kind(Whitespace), Kind(Email)],
3,
),
(
"foo baz+bob.lob@bob.com",
&[Kind(Word), Kind(Whitespace), Kind(Email)],
3,
),
];
for (input, token_types, expected_len) in cases {
let line = read_line!(input);
expect_eq!(line.contains_seq(token_types), true, from: input);
expect_eq!(line.orig_len, expected_len, from: input);
}
}
}