use crate::lex::annotation::analyze_annotation_header_tokens;
use crate::lex::ast::elements::sequence_marker::{DecorationStyle, Form, Separator};
use crate::lex::escape::find_structural_lex_markers;
use crate::lex::token::{LineType, Token};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedListMarker {
pub marker_start: usize,
pub marker_end: usize,
pub body_start: usize,
pub style: DecorationStyle,
pub separator: Separator,
pub form: Form,
}
pub fn classify_line_tokens(tokens: &[Token]) -> LineType {
if tokens.is_empty() {
return LineType::ParagraphLine;
}
if is_blank_line(tokens) {
return LineType::BlankLine;
}
if is_data_marker_line(tokens) {
return LineType::DataMarkerLine;
}
if is_data_line(tokens) {
return LineType::DataLine;
}
let has_seq_marker = parse_seq_marker(tokens).is_some();
let has_colon = ends_with_colon(tokens);
if has_seq_marker && has_colon {
return LineType::SubjectOrListItemLine;
}
if has_seq_marker {
return LineType::ListLine;
}
if has_colon {
return LineType::SubjectLine;
}
LineType::ParagraphLine
}
fn is_blank_line(tokens: &[Token]) -> bool {
tokens.iter().all(|t| {
matches!(
t,
Token::Whitespace(_) | Token::Indentation | Token::BlankLine(_)
)
})
}
fn is_data_marker_line(tokens: &[Token]) -> bool {
if tokens.is_empty() {
return false;
}
let structural = find_structural_lex_markers(tokens);
if structural.len() < 2 {
return false;
}
let first_marker_idx = structural[0];
for token in &tokens[..first_marker_idx] {
if !matches!(token, Token::Indentation | Token::Whitespace(_)) {
return false;
}
}
if first_marker_idx + 1 < tokens.len()
&& !matches!(tokens[first_marker_idx + 1], Token::Whitespace(_))
{
return false;
}
let second_marker_idx = structural[1];
let header_tokens = &tokens[first_marker_idx + 1..second_marker_idx];
analyze_annotation_header_tokens(header_tokens).has_label
}
fn is_data_line(tokens: &[Token]) -> bool {
if tokens.is_empty() {
return false;
}
let structural = find_structural_lex_markers(tokens);
if structural.is_empty() {
return false;
}
if structural.len() >= 2 {
return false;
}
let first_marker_idx = structural[0];
for token in &tokens[..first_marker_idx] {
if !matches!(token, Token::Indentation | Token::Whitespace(_)) {
return false;
}
}
if first_marker_idx + 1 >= tokens.len()
|| !matches!(tokens[first_marker_idx + 1], Token::Whitespace(_))
{
return false;
}
let mut header_tokens = Vec::new();
for token in tokens[first_marker_idx + 1..].iter() {
if matches!(token, Token::BlankLine(_)) {
continue;
}
header_tokens.push(token.clone());
}
if header_tokens.is_empty() {
return false;
}
analyze_annotation_header_tokens(&header_tokens).has_label
}
pub fn parse_seq_marker(tokens: &[Token]) -> Option<ParsedListMarker> {
let mut i = 0;
while i < tokens.len() && matches!(tokens[i], Token::Indentation | Token::Whitespace(_)) {
i += 1;
}
if i >= tokens.len() {
return None;
}
let finish_with_whitespace = |marker_end: usize,
style: DecorationStyle,
separator: Separator,
form: Form|
-> Option<ParsedListMarker> {
let mut body_start = marker_end;
let mut saw_ws = false;
while body_start < tokens.len() {
if matches!(tokens[body_start], Token::Whitespace(_)) {
saw_ws = true;
body_start += 1;
continue;
}
break;
}
if !saw_ws {
return None;
}
Some(ParsedListMarker {
marker_start: i,
marker_end,
body_start,
style,
separator,
form,
})
};
if matches!(tokens[i], Token::Dash) {
return finish_with_whitespace(
i + 1,
DecorationStyle::Plain,
Separator::Period,
Form::Short,
);
}
if i + 2 < tokens.len()
&& matches!(tokens[i], Token::OpenParen)
&& matches!(tokens[i + 2], Token::CloseParen)
&& is_segment(&tokens[i + 1])
{
let style = detect_segment_style(&tokens[i + 1]);
return finish_with_whitespace(i + 3, style, Separator::DoubleParens, Form::Short);
}
if is_segment(&tokens[i]) {
let mut idx = i + 1;
let mut segments = 1;
while idx + 1 < tokens.len()
&& matches!(tokens[idx], Token::Period)
&& is_segment(&tokens[idx + 1])
{
segments += 1;
idx += 2;
}
if segments >= 2 {
let separator = if idx < tokens.len() && matches!(tokens[idx], Token::CloseParen) {
idx += 1;
Separator::Parenthesis
} else if idx < tokens.len() && matches!(tokens[idx], Token::Period) {
idx += 1;
Separator::Period
} else {
Separator::Period
};
let style = detect_segment_style(&tokens[i]);
return finish_with_whitespace(idx, style, separator, Form::Extended);
}
}
if i + 1 < tokens.len()
&& is_segment(&tokens[i])
&& matches!(tokens[i + 1], Token::Period | Token::CloseParen)
{
let style = detect_segment_style(&tokens[i]);
let separator = if matches!(tokens[i + 1], Token::Period) {
Separator::Period
} else {
Separator::Parenthesis
};
return finish_with_whitespace(i + 2, style, separator, Form::Short);
}
None
}
pub fn has_seq_marker(tokens: &[Token]) -> bool {
parse_seq_marker(tokens).is_some()
}
fn is_single_letter(s: &str) -> bool {
s.len() == 1 && s.chars().next().is_some_and(|c| c.is_alphabetic())
}
fn is_segment(token: &Token) -> bool {
matches!(token, Token::Number(_))
|| matches!(token, Token::Text(ref s) if is_single_letter(s) || is_roman_numeral(s))
}
fn is_roman_numeral(s: &str) -> bool {
if s.is_empty() {
return false;
}
let is_upper = s
.chars()
.all(|c| matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M'));
let is_lower = s
.chars()
.all(|c| matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm'));
is_upper || is_lower
}
fn detect_segment_style(token: &Token) -> DecorationStyle {
match token {
Token::Number(_) => DecorationStyle::Numerical,
Token::Text(s) if is_roman_numeral(s) => DecorationStyle::Roman,
Token::Text(s) if is_single_letter(s) => DecorationStyle::Alphabetical,
_ => DecorationStyle::Numerical, }
}
pub fn ends_with_colon(tokens: &[Token]) -> bool {
let mut i = tokens.len() as i32 - 1;
while i >= 0 {
let token = &tokens[i as usize];
match token {
Token::BlankLine(_) | Token::Whitespace(_) => {
i -= 1;
}
Token::Colon => return true,
_ => return false,
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_paragraph_line() {
let tokens = vec![
Token::Text("Hello".to_string()),
Token::Whitespace(1),
Token::Text("world".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::ParagraphLine);
}
#[test]
fn test_classify_subject_line() {
let tokens = vec![
Token::Text("Title".to_string()),
Token::Colon,
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::SubjectLine);
}
#[test]
fn test_classify_list_line() {
let tokens = vec![
Token::Dash,
Token::Whitespace(1),
Token::Text("Item".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::ListLine);
}
#[test]
fn test_classify_blank_line() {
let tokens = vec![
Token::Whitespace(1),
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::BlankLine);
}
#[test]
fn test_classify_annotation_start_line() {
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("label".to_string()),
Token::Whitespace(1),
Token::LexMarker,
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::DataMarkerLine);
}
#[test]
fn test_classify_data_line() {
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("label".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::DataLine);
}
#[test]
fn test_annotation_line_without_label_falls_back_to_paragraph() {
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("version".to_string()),
Token::Equals,
Token::Number("3.11".to_string()),
Token::Whitespace(1),
Token::LexMarker,
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::ParagraphLine);
}
#[test]
fn test_classify_subject_or_list_item_line() {
let tokens = vec![
Token::Dash,
Token::Whitespace(1),
Token::Text("Item".to_string()),
Token::Colon,
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(
classify_line_tokens(&tokens),
LineType::SubjectOrListItemLine
);
}
#[test]
fn test_ordered_seq_markers() {
let tokens = vec![
Token::Number("1".to_string()),
Token::Period,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
assert!(has_seq_marker(&tokens));
let tokens = vec![
Token::Text("a".to_string()),
Token::Period,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
assert!(has_seq_marker(&tokens));
let tokens = vec![
Token::Text("I".to_string()),
Token::Period,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
assert!(has_seq_marker(&tokens));
let tokens = vec![
Token::Number("1".to_string()),
Token::CloseParen,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
assert!(has_seq_marker(&tokens));
}
#[test]
fn test_extended_ordered_seq_marker() {
let tokens = vec![
Token::Number("4".to_string()),
Token::Period,
Token::Number("3".to_string()),
Token::Period,
Token::Number("2".to_string()),
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
let parsed = parse_seq_marker(&tokens).expect("expected list marker");
assert_eq!(parsed.marker_start, 0);
assert_eq!(parsed.marker_end, 5);
assert_eq!(parsed.body_start, 6);
assert_eq!(classify_line_tokens(&tokens), LineType::ListLine);
}
#[test]
fn test_extended_marker_with_lowercase_roman() {
let tokens = vec![
Token::Number("1".to_string()),
Token::Period,
Token::Text("a".to_string()),
Token::Period,
Token::Text("ii".to_string()),
Token::Period,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
let parsed = parse_seq_marker(&tokens).expect("expected extended marker");
assert_eq!(parsed.form, Form::Extended);
assert_eq!(parsed.style, DecorationStyle::Numerical);
assert_eq!(parsed.separator, Separator::Period);
assert_eq!(classify_line_tokens(&tokens), LineType::ListLine);
}
#[test]
fn test_lowercase_roman_short_marker() {
let tokens = vec![
Token::Text("ii".to_string()),
Token::Period,
Token::Whitespace(1),
Token::Text("Item".to_string()),
];
let parsed = parse_seq_marker(&tokens).expect("expected short roman marker");
assert_eq!(parsed.form, Form::Short);
assert_eq!(parsed.style, DecorationStyle::Roman);
assert_eq!(classify_line_tokens(&tokens), LineType::ListLine);
}
#[test]
fn test_lex_marker_inside_quoted_value_is_annotation_start() {
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("note".to_string()),
Token::Whitespace(1),
Token::Text("foo".to_string()),
Token::Equals,
Token::Quote,
Token::LexMarker, Token::Whitespace(1),
Token::Text("jane".to_string()),
Token::Quote,
Token::Whitespace(1),
Token::LexMarker, Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::DataMarkerLine);
}
#[test]
fn test_lex_marker_inside_quoted_value_data_line() {
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("note".to_string()),
Token::Whitespace(1),
Token::Text("foo".to_string()),
Token::Equals,
Token::Quote,
Token::LexMarker, Token::Whitespace(1),
Token::Text("value".to_string()),
Token::Quote,
Token::BlankLine(Some("\n".to_string())),
];
assert_eq!(classify_line_tokens(&tokens), LineType::DataLine);
}
}