use crate::lex::token::line::{LineToken, LineType};
pub struct DocumentStartMarker;
impl DocumentStartMarker {
pub fn new() -> Self {
Self
}
pub fn mark(line_tokens: Vec<LineToken>) -> Vec<LineToken> {
if line_tokens.is_empty() {
return vec![Self::synthetic_document_start()];
}
let insert_pos = Self::find_content_start(&line_tokens);
let mut result = Vec::with_capacity(line_tokens.len() + 1);
result.extend(line_tokens[..insert_pos].iter().cloned());
result.push(Self::synthetic_document_start());
result.extend(line_tokens[insert_pos..].iter().cloned());
result
}
fn find_content_start(tokens: &[LineToken]) -> usize {
let mut pos = 0;
let mut indent_depth: usize = 0;
while pos < tokens.len() {
let line_type = tokens[pos].line_type;
match line_type {
LineType::Indent => {
indent_depth += 1;
pos += 1;
}
LineType::Dedent => {
indent_depth = indent_depth.saturating_sub(1);
pos += 1;
}
LineType::DataMarkerLine if indent_depth == 0 => {
pos += 1;
}
LineType::BlankLine if indent_depth == 0 => {
let mut lookahead = pos + 1;
while lookahead < tokens.len()
&& tokens[lookahead].line_type == LineType::BlankLine
{
lookahead += 1;
}
if lookahead < tokens.len()
&& tokens[lookahead].line_type == LineType::DataMarkerLine
{
pos = lookahead;
} else {
break;
}
}
_ if indent_depth > 0 => {
pos += 1;
}
_ => {
break;
}
}
}
pos
}
fn synthetic_document_start() -> LineToken {
LineToken {
source_tokens: vec![],
token_spans: vec![],
line_type: LineType::DocumentStart,
}
}
}
impl Default for DocumentStartMarker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::token::Token;
#[allow(clippy::single_range_in_vec_init)]
fn make_line(line_type: LineType) -> LineToken {
LineToken {
source_tokens: vec![Token::Text("test".to_string())],
token_spans: vec![0..4],
line_type,
}
}
#[allow(clippy::single_range_in_vec_init)]
fn make_blank() -> LineToken {
LineToken {
source_tokens: vec![Token::BlankLine(Some("\n".to_string()))],
token_spans: vec![0..1],
line_type: LineType::BlankLine,
}
}
fn make_indent() -> LineToken {
LineToken {
source_tokens: vec![],
token_spans: vec![],
line_type: LineType::Indent,
}
}
fn make_dedent() -> LineToken {
LineToken {
source_tokens: vec![],
token_spans: vec![],
line_type: LineType::Dedent,
}
}
#[test]
fn test_empty_document() {
let tokens: Vec<LineToken> = vec![];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 1);
assert_eq!(result[0].line_type, LineType::DocumentStart);
}
#[test]
fn test_no_annotations() {
let tokens = vec![
make_line(LineType::ParagraphLine),
make_blank(),
make_line(LineType::ParagraphLine),
];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 4);
assert_eq!(result[0].line_type, LineType::DocumentStart);
assert_eq!(result[1].line_type, LineType::ParagraphLine);
assert_eq!(result[2].line_type, LineType::BlankLine);
assert_eq!(result[3].line_type, LineType::ParagraphLine);
}
#[test]
fn test_single_annotation_then_content() {
let tokens = vec![
make_line(LineType::DataMarkerLine),
make_indent(),
make_line(LineType::ParagraphLine),
make_dedent(),
make_line(LineType::ParagraphLine),
];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 6);
assert_eq!(result[0].line_type, LineType::DataMarkerLine);
assert_eq!(result[1].line_type, LineType::Indent);
assert_eq!(result[2].line_type, LineType::ParagraphLine);
assert_eq!(result[3].line_type, LineType::Dedent);
assert_eq!(result[4].line_type, LineType::DocumentStart);
assert_eq!(result[5].line_type, LineType::ParagraphLine);
}
#[test]
fn test_multiple_annotations() {
let tokens = vec![
make_line(LineType::DataMarkerLine),
make_indent(),
make_line(LineType::ParagraphLine),
make_dedent(),
make_blank(),
make_line(LineType::DataMarkerLine),
make_indent(),
make_line(LineType::ParagraphLine),
make_dedent(),
make_line(LineType::ParagraphLine),
];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 11);
assert_eq!(result[9].line_type, LineType::DocumentStart);
assert_eq!(result[10].line_type, LineType::ParagraphLine);
}
#[test]
fn test_blank_lines_before_content() {
let tokens = vec![
make_line(LineType::DataMarkerLine),
make_indent(),
make_line(LineType::ParagraphLine),
make_dedent(),
make_blank(),
make_line(LineType::ParagraphLine),
];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 7);
assert_eq!(result[4].line_type, LineType::DocumentStart);
assert_eq!(result[5].line_type, LineType::BlankLine);
assert_eq!(result[6].line_type, LineType::ParagraphLine);
}
#[test]
fn test_only_annotations() {
let tokens = vec![
make_line(LineType::DataMarkerLine),
make_indent(),
make_line(LineType::ParagraphLine),
make_dedent(),
];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result.len(), 5);
assert_eq!(result[4].line_type, LineType::DocumentStart);
}
#[test]
fn test_synthetic_token_has_no_source() {
let tokens = vec![make_line(LineType::ParagraphLine)];
let result = DocumentStartMarker::mark(tokens);
assert_eq!(result[0].line_type, LineType::DocumentStart);
assert!(result[0].source_tokens.is_empty());
assert!(result[0].token_spans.is_empty());
}
}