use std::ops::Range;
pub(crate) type SkipRange = Range<usize>;
pub(crate) fn scan_skip_regions(input: &str) -> Vec<SkipRange> {
let mut out: Vec<SkipRange> = Vec::new();
let bytes = input.as_bytes();
let mut idx = 0;
let mut at_line_start = true;
while idx < bytes.len() {
let rest = &input[idx..];
if at_line_start {
if let Some(len) = take_indented_code_block(input, idx) {
out.push(idx..idx + len);
idx += len;
at_line_start = true;
continue;
}
if let Some(len) = take_fenced_code_block(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = true;
continue;
}
if let Some(len) = take_reference_definition(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = true;
continue;
}
}
let first = bytes[idx];
match first {
b'`' => {
if let Some(len) = take_code_span(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = false;
continue;
}
}
b'<' => {
if let Some(len) = take_autolink(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = false;
continue;
}
}
b'[' => {
if let Some(len) = take_link(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = false;
continue;
}
}
b'\n' => {
idx += 1;
at_line_start = true;
continue;
}
_ => {}
}
let ch_len = utf8_char_len(bytes, idx);
idx += ch_len;
at_line_start = false;
}
out
}
pub(crate) fn scan_code_regions(input: &str, include_code_spans: bool) -> Vec<SkipRange> {
let mut out: Vec<SkipRange> = Vec::new();
let bytes = input.as_bytes();
let mut idx = 0;
let mut at_line_start = true;
while idx < bytes.len() {
let rest = &input[idx..];
if at_line_start {
if let Some(len) = take_indented_code_block(input, idx) {
out.push(idx..idx + len);
idx += len;
at_line_start = true;
continue;
}
if let Some(len) = take_fenced_code_block(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = true;
continue;
}
}
match bytes[idx] {
b'`' => {
if let Some(len) = take_code_span(rest) {
if include_code_spans {
out.push(idx..idx + len);
}
idx += len;
at_line_start = false;
continue;
}
}
b'\n' => {
idx += 1;
at_line_start = true;
continue;
}
_ => {}
}
idx += utf8_char_len(bytes, idx);
at_line_start = false;
}
out
}
pub(crate) fn scan_code_span_candidates(input: &str) -> Vec<SkipRange> {
let mut out: Vec<SkipRange> = Vec::new();
let bytes = input.as_bytes();
let mut idx = 0;
let mut at_line_start = true;
while idx < bytes.len() {
let rest = &input[idx..];
if at_line_start {
if let Some(len) = take_indented_code_block(input, idx) {
idx += len;
at_line_start = true;
continue;
}
if let Some(len) = take_fenced_code_block(rest) {
idx += len;
at_line_start = true;
continue;
}
if let Some(len) = take_reference_definition(rest) {
idx += len;
at_line_start = true;
continue;
}
}
match bytes[idx] {
b'`' => {
if let Some(len) = take_code_span(rest) {
out.push(idx..idx + len);
idx += len;
at_line_start = false;
continue;
}
}
b'<' => {
if let Some(len) = take_autolink(rest) {
idx += len;
at_line_start = false;
continue;
}
}
b'[' => {
if let Some(len) = take_link(rest) {
idx += len;
at_line_start = false;
continue;
}
}
b'\n' => {
idx += 1;
at_line_start = true;
continue;
}
_ => {}
}
idx += utf8_char_len(bytes, idx);
at_line_start = false;
}
out
}
pub(crate) fn utf8_char_len(bytes: &[u8], idx: usize) -> usize {
let lead = bytes[idx];
if lead < 0xC0 {
1
} else if lead < 0xE0 {
2
} else if lead < 0xF0 {
3
} else {
4
}
}
fn take_code_span(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
if bytes.first() != Some(&b'`') {
return None;
}
let mut open_len = 0;
while open_len < bytes.len() && bytes[open_len] == b'`' {
open_len += 1;
}
let mut index = open_len;
while index < bytes.len() {
if bytes[index] == b'`' {
let mut run = 0;
while index + run < bytes.len() && bytes[index + run] == b'`' {
run += 1;
}
if run == open_len {
return Some(index + run);
}
index += run;
} else {
index += 1;
}
}
None
}
fn take_fenced_code_block(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
let mut leading_spaces = 0;
while leading_spaces < 4 && leading_spaces < bytes.len() && bytes[leading_spaces] == b' ' {
leading_spaces += 1;
}
if leading_spaces >= 4 {
return None;
}
let fence_start = leading_spaces;
let fence_char = *bytes.get(fence_start)?;
if fence_char != b'`' && fence_char != b'~' {
return None;
}
let mut open_len = 0;
while fence_start + open_len < bytes.len() && bytes[fence_start + open_len] == fence_char {
open_len += 1;
}
if open_len < 3 {
return None;
}
let mut index = fence_start + open_len;
while index < bytes.len() && bytes[index] != b'\n' {
index += 1;
}
if index < bytes.len() {
index += 1; }
while index < bytes.len() {
let mut spaces = 0;
while spaces < 3 && index + spaces < bytes.len() && bytes[index + spaces] == b' ' {
spaces += 1;
}
let candidate = index + spaces;
if candidate < bytes.len() && bytes[candidate] == fence_char {
let mut run = 0;
while candidate + run < bytes.len() && bytes[candidate + run] == fence_char {
run += 1;
}
if run >= open_len {
let mut end = candidate + run;
while end < bytes.len() && (bytes[end] == b' ' || bytes[end] == b'\t') {
end += 1;
}
if end >= bytes.len() || bytes[end] == b'\n' {
let end = if end < bytes.len() { end + 1 } else { end };
return Some(end);
}
}
}
while index < bytes.len() && bytes[index] != b'\n' {
index += 1;
}
if index < bytes.len() {
index += 1;
}
}
Some(bytes.len())
}
fn take_indented_code_block(input: &str, idx: usize) -> Option<usize> {
let bytes = input.as_bytes();
if idx > 0 {
let newline_pos = idx - 1;
if bytes[newline_pos] != b'\n' {
return None;
}
if newline_pos > 0 {
let mut scan_back = newline_pos;
while scan_back > 0 && bytes[scan_back - 1] != b'\n' {
scan_back -= 1;
}
let prev_line = &bytes[scan_back..newline_pos];
if prev_line.iter().any(|byte| *byte != b' ' && *byte != b'\t') {
return None;
}
}
let _ = newline_pos;
}
let mut index = idx;
let mut consumed_any = false;
loop {
let line_start = index;
let mut columns = 0;
while index < bytes.len() && columns < 4 {
match bytes[index] {
b' ' => columns += 1,
b'\t' => columns += 4 - (columns % 4),
_ => break,
}
index += 1;
}
if columns < 4 {
let mut end = line_start;
while end < bytes.len() && (bytes[end] == b' ' || bytes[end] == b'\t') {
end += 1;
}
if end < bytes.len() && bytes[end] == b'\n' && consumed_any {
index = end + 1;
continue;
}
return if consumed_any {
Some(line_start - idx)
} else {
None
};
}
while index < bytes.len() && bytes[index] != b'\n' {
index += 1;
}
if index < bytes.len() {
index += 1;
}
consumed_any = true;
if index >= bytes.len() {
return Some(index - idx);
}
}
}
fn take_reference_definition(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
let mut index = 0;
while index < 3 && index < bytes.len() && bytes[index] == b' ' {
index += 1;
}
if bytes.get(index) != Some(&b'[') {
return None;
}
index += 1;
let label_start = index;
while index < bytes.len() && bytes[index] != b']' && bytes[index] != b'\n' {
index += 1;
}
if index == label_start || bytes.get(index) != Some(&b']') {
return None;
}
index += 1;
if bytes.get(index) != Some(&b':') {
return None;
}
index += 1;
while index < bytes.len() && bytes[index] != b'\n' {
index += 1;
}
if index < bytes.len() {
index += 1;
}
Some(index)
}
fn take_autolink(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
if bytes.first() != Some(&b'<') {
return None;
}
let mut index = 1;
let body_start = index;
while index < bytes.len() {
match bytes[index] {
b'>' => {
if index == body_start {
return None;
}
let body = &input[body_start..index];
if looks_like_uri_or_email(body) {
return Some(index + 1);
}
return None;
}
b' ' | b'\t' | b'\n' | b'<' => return None,
_ => index += 1,
}
}
None
}
fn looks_like_uri_or_email(body: &str) -> bool {
let bytes = body.as_bytes();
let mut index = 0;
while index < bytes.len() && (bytes[index].is_ascii_alphabetic()) {
index += 1;
}
if index > 0 {
while index < bytes.len()
&& (bytes[index].is_ascii_alphanumeric()
|| bytes[index] == b'+'
|| bytes[index] == b'-'
|| bytes[index] == b'.')
{
index += 1;
}
if index < bytes.len() && bytes[index] == b':' && index + 1 < bytes.len() {
return true;
}
}
if let Some(at) = body.find('@')
&& at > 0
&& at + 1 < body.len()
{
return true;
}
false
}
fn take_link(input: &str) -> Option<usize> {
let bytes = input.as_bytes();
if bytes.first() != Some(&b'[') {
return None;
}
let mut index = 1;
let mut depth: i32 = 1;
while index < bytes.len() && depth > 0 {
match bytes[index] {
b'\\' => {
index += 2;
continue;
}
b'[' => {
depth += 1;
index += 1;
}
b']' => {
depth -= 1;
index += 1;
}
b'`' => {
if let Some(len) = take_code_span(&input[index..]) {
index += len;
} else {
index += 1;
}
}
b'\n' => {
if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
return None;
}
index += 1;
}
_ => index += 1,
}
}
if depth != 0 {
return None;
}
if index < bytes.len() && bytes[index] == b'(' {
let body_start = index + 1;
let mut end = body_start;
let mut paren_depth: i32 = 1;
while end < bytes.len() && paren_depth > 0 {
match bytes[end] {
b'\\' => {
end += 2;
continue;
}
b'(' => paren_depth += 1,
b')' => paren_depth -= 1,
b'\n' => return Some(index),
_ => {}
}
end += 1;
}
if paren_depth == 0 {
return Some(end);
}
return Some(index);
}
if index < bytes.len() && bytes[index] == b'[' {
let mut end = index + 1;
while end < bytes.len() && bytes[end] != b']' && bytes[end] != b'\n' {
end += 1;
}
if end < bytes.len() && bytes[end] == b']' {
return Some(end + 1);
}
}
Some(index)
}
pub(crate) fn position_in_skip(skips: &[SkipRange], pos: usize) -> bool {
skips
.iter()
.any(|range| pos >= range.start && pos < range.end)
}
#[cfg(test)]
mod tests;