use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;
pub(crate) fn emit_line_tokens(builder: &mut GreenNodeBuilder<'static>, line: &str) {
if let Some(text) = line.strip_suffix("\r\n") {
builder.token(SyntaxKind::TEXT.into(), text);
builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
} else if let Some(text) = line.strip_suffix('\n') {
builder.token(SyntaxKind::TEXT.into(), text);
builder.token(SyntaxKind::NEWLINE.into(), "\n");
} else {
builder.token(SyntaxKind::TEXT.into(), line);
}
}
pub(crate) fn emit_separator_tokens(builder: &mut GreenNodeBuilder<'static>, line: &str) {
let (content, newline) = strip_newline(line);
let bytes = content.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
match b {
b'|' | b'+' => {
builder.token(SyntaxKind::TABLE_SEP_DELIM.into(), &content[i..i + 1]);
i += 1;
}
b':' => {
builder.token(SyntaxKind::TABLE_SEP_COLON.into(), &content[i..i + 1]);
i += 1;
}
b'-' => {
let start = i;
while i < bytes.len() && bytes[i] == b'-' {
i += 1;
}
builder.token(SyntaxKind::TABLE_SEP_DASHES.into(), &content[start..i]);
}
b'=' => {
let start = i;
while i < bytes.len() && bytes[i] == b'=' {
i += 1;
}
builder.token(SyntaxKind::TABLE_SEP_EQUALS.into(), &content[start..i]);
}
b' ' | b'\t' => {
let start = i;
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
i += 1;
}
builder.token(SyntaxKind::TABLE_SEP_WHITESPACE.into(), &content[start..i]);
}
_ => {
let start = i;
while i < bytes.len()
&& !matches!(bytes[i], b'|' | b'+' | b':' | b'-' | b'=' | b' ' | b'\t')
{
i += 1;
}
builder.token(SyntaxKind::TEXT.into(), &content[start..i]);
}
}
}
if !newline.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline);
}
}
pub(crate) fn strip_leading_spaces_n(line: &str, max_spaces: usize) -> &str {
let spaces_to_strip = line
.chars()
.take(max_spaces)
.take_while(|&c| c == ' ')
.count();
&line[spaces_to_strip..]
}
pub(crate) fn strip_leading_spaces(line: &str) -> &str {
strip_leading_spaces_n(line, 3)
}
pub(crate) fn strip_newline(line: &str) -> (&str, &str) {
if let Some(content) = line.strip_suffix("\r\n") {
(content, "\r\n")
} else if let Some(content) = line.strip_suffix('\n') {
(content, "\n")
} else {
(line, "")
}
}
#[inline]
pub(crate) fn trim_end_newlines(s: &str) -> &str {
let bytes = s.as_bytes();
let mut end = bytes.len();
while end > 0 {
let b = bytes[end - 1];
if b == b'\n' || b == b'\r' {
end -= 1;
} else {
break;
}
}
unsafe { std::str::from_utf8_unchecked(&bytes[..end]) }
}
#[inline]
pub(crate) fn trim_start_spaces_tabs(s: &str) -> &str {
let bytes = s.as_bytes();
let mut start = 0;
while start < bytes.len() {
let b = bytes[start];
if b == b' ' || b == b'\t' {
start += 1;
} else {
break;
}
}
unsafe { std::str::from_utf8_unchecked(&bytes[start..]) }
}
#[inline]
pub(crate) fn is_blank_line(s: &str) -> bool {
s.as_bytes()
.iter()
.all(|&b| b == b' ' || b == b'\t' || b == b'\n' || b == b'\r')
}
#[inline]
pub(crate) fn trim_end_spaces_tabs(s: &str) -> &str {
let bytes = s.as_bytes();
let mut end = bytes.len();
while end > 0 {
let b = bytes[end - 1];
if b == b' ' || b == b'\t' {
end -= 1;
} else {
break;
}
}
unsafe { std::str::from_utf8_unchecked(&bytes[..end]) }
}
pub(crate) fn split_lines_inclusive(input: &str) -> Vec<&str> {
if input.is_empty() {
return vec![];
}
let mut lines = Vec::new();
let mut start = 0;
let bytes = input.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len {
if bytes[i] == b'\n' {
lines.push(&input[start..=i]);
start = i + 1;
i += 1;
} else if bytes[i] == b'\r' && i + 1 < len && bytes[i + 1] == b'\n' {
lines.push(&input[start..=i + 1]);
start = i + 2;
i += 2;
} else {
i += 1;
}
}
if start < len {
lines.push(&input[start..]);
}
lines
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_leading_spaces_n() {
assert_eq!(strip_leading_spaces_n(" text", 3), "text");
assert_eq!(strip_leading_spaces_n(" text", 3), "text");
assert_eq!(strip_leading_spaces_n(" text", 3), "text");
assert_eq!(strip_leading_spaces_n("text", 3), "text");
assert_eq!(strip_leading_spaces_n(" text", 3), " text");
}
#[test]
fn test_strip_newline() {
assert_eq!(strip_newline("text\n"), ("text", "\n"));
assert_eq!(strip_newline("text\r\n"), ("text", "\r\n"));
assert_eq!(strip_newline("text"), ("text", ""));
}
#[test]
fn test_trim_end_newlines() {
assert_eq!(trim_end_newlines("foo\n"), "foo");
assert_eq!(trim_end_newlines("foo\r\n"), "foo");
assert_eq!(trim_end_newlines("foo\n\n"), "foo");
assert_eq!(trim_end_newlines("foo"), "foo");
assert_eq!(trim_end_newlines(""), "");
assert_eq!(trim_end_newlines("\n"), "");
assert_eq!(trim_end_newlines("föö\n"), "föö");
}
fn separator_tokens(line: &str) -> Vec<(SyntaxKind, String)> {
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::TABLE_SEPARATOR.into());
emit_separator_tokens(&mut builder, line);
builder.finish_node();
let node = crate::syntax::SyntaxNode::new_root(builder.finish());
node.children_with_tokens()
.filter_map(|el| el.into_token())
.map(|t| (t.kind(), t.text().to_string()))
.collect()
}
#[test]
fn test_emit_separator_tokens_reconstruction() {
for line in [
"|:--|--:|:-:|\n",
"+------+:----:+------+\n",
"+======+======+\r\n",
"------- ------ ----------\n",
":--:", "|:--|--:|?weird|\n", ] {
let reconstructed: String = separator_tokens(line)
.iter()
.map(|(_, t)| t.as_str())
.collect();
assert_eq!(reconstructed, line, "round-trip failed for {line:?}");
}
}
#[test]
fn test_emit_separator_tokens_kinds() {
use SyntaxKind::*;
assert_eq!(
separator_tokens("|:--|--:|\n"),
vec![
(TABLE_SEP_DELIM, "|".to_string()),
(TABLE_SEP_COLON, ":".to_string()),
(TABLE_SEP_DASHES, "--".to_string()),
(TABLE_SEP_DELIM, "|".to_string()),
(TABLE_SEP_DASHES, "--".to_string()),
(TABLE_SEP_COLON, ":".to_string()),
(TABLE_SEP_DELIM, "|".to_string()),
(NEWLINE, "\n".to_string()),
],
);
assert_eq!(
separator_tokens("--- ---\n"),
vec![
(TABLE_SEP_DASHES, "---".to_string()),
(TABLE_SEP_WHITESPACE, " ".to_string()),
(TABLE_SEP_DASHES, "---".to_string()),
(NEWLINE, "\n".to_string()),
],
);
}
#[test]
fn test_trim_spaces_tabs() {
assert_eq!(trim_start_spaces_tabs(" \tfoo"), "foo");
assert_eq!(trim_start_spaces_tabs("foo"), "foo");
assert_eq!(trim_start_spaces_tabs(""), "");
assert_eq!(trim_end_spaces_tabs("foo \t"), "foo");
assert_eq!(trim_end_spaces_tabs("foo"), "foo");
assert_eq!(trim_end_spaces_tabs(""), "");
assert_eq!(trim_end_spaces_tabs("föö "), "föö");
}
}