use alloc::vec;
use core::ops::Range;
use logos::{Logos, Span};
use super::{ParsedField, ParsedFieldType};
#[derive(Logos, Debug, PartialEq)]
#[logos(subpattern space = r"[^\S\r\n]")]
#[logos(subpattern linebreak = r"[\r\n|\r|\n]+")]
#[logos(subpattern to_end_of_line = r"[^\r\n]*")]
#[logos(subpattern coda_id = r"[/:.a-zA-Z0-9_-]+")]
#[logos(subpattern data_id = r"[a-zA-Z0-9_-]+")]
#[logos(subpattern field_id = r"[a-zA-Z0-9_-]+")]
pub enum Token<'a> {
#[regex(r"[\r\n|\r|\n]*#(?&space)`(?&coda_id)`(?i)((?&space)coda)?", |lex| {
let slice = lex.slice();
let slice = slice.trim(); // trim whitespace
let slice = &slice[1..]; // trim leading #
let slice = slice.trim(); // trim whitespace
// Slice should contain:
// `The.Coda/Name` Coda
let split = slice.split_whitespace();
// Scan for the first slice in the split
// containing ` characters; this is the
// slice containing the coda's name.
let mut name = slice;
for next in split {
if next.contains('`') {
name = next;
break;
}
}
// Trim leading and trailing grave characters
// to obtain the full name.
let full_name = &name[1..name.len() - 1];
// Split on all hierarchy characters to
// obtain the local name.
let mut local_name = full_name;
for next in local_name.split(&['.', ':', '/']) {
if !next.is_empty() {
local_name = next;
}
}
(full_name, local_name)
})]
Coda((&'a str, &'a str)),
#[regex(r"(?&linebreak)##(?&space)`(?&data_id)`(?&space)(?i)(data)", |lex| {
let slice = lex.slice();
let slice = slice.trim(); // trim whitespace
let slice = &slice[2..]; // trim leading ##
let slice = slice.trim(); // trim whitespace
// Slice should contain:
// `DataName` Data
let mut split = slice.split_whitespace();
let data_name = split.next().unwrap();
// Trim leading and trailing grave characters.
&data_name[1..data_name.len() - 1]
})]
Data(&'a str),
#[regex(r"(?&linebreak)\+(?&space)`(?&field_id)`(?&space)(?&to_end_of_line)", |lex| {
parse_data_field(lex.slice())
})]
DataField(ParsedField),
#[regex(r"(?&linebreak)(?&to_end_of_line)", |lex| {
let span = lex.span();
let mut whitespace = 0;
for c in lex.slice().chars() {
match c {
'\n' | '\r' => continue,
' ' | '\t' => whitespace += 1,
_ => break,
}
}
(lex.slice(), span, whitespace)
})]
DocsLine((&'a str, Span, usize)),
}
#[derive(Logos, Debug, PartialEq)]
#[logos(subpattern space = r"[^\S\r\n]")]
#[logos(subpattern linebreak = r"[\r\n|\r|\n]+")]
#[logos(subpattern data_id = r"[a-zA-Z0-9_-]+")]
#[logos(subpattern field_id = r"[a-zA-Z0-9_-]+")]
pub enum DataFieldToken<'a> {
#[regex(r"(?&linebreak)\+(?&space)`(?&field_id)`(?&space)", |lex| {
let slice = lex.slice();
let slice = slice.trim(); // trim whitespace
let slice = &slice[1..]; // trim leading +
let slice = slice.trim(); // trim whitespace
let slice = &slice[1..]; // trim leading `
let slice = &slice[..slice.len() - 1]; // trim trailing `
let slice = slice.trim(); // trim whitespace
slice
})]
FieldName(&'a str),
#[regex(r"(?i)optional(?&space)")]
Optional,
#[regex(r"(?i)flattened(?&space)")]
Flattened,
#[regex(r"(?i)([0-9]+d(?&space))?list(?&space)of(?&space)", |lex| {
let slice = lex.slice();
let slice = slice.trim();
// Check if the slice contains list dimensions.
let mut split = slice.split_whitespace();
if let Some(next) = split.next() {
if next.ends_with('d') || next.ends_with('D') {
let numeric = next.trim_end_matches(['d', 'D']);
if let Ok(number) = numeric.parse() {
return number;
}
}
}
1
})]
List(usize),
#[regex(r"(?i)map(?&space)of(?&space)")]
Map,
#[regex(r"(?i)(to(?&space))?\[`(?&data_id)`\]\([^)]*\)", |lex| {
let slice = lex.slice();
// Strip off any leading `to `.
let split = slice.split_whitespace();
let slice = split.last().unwrap();
// Trim leading [`.
let slice = &slice[2..];
// Find trailing ` and truncate the remainder.
let end = slice.find('`').unwrap();
&slice[..end]
})]
#[regex(r"(?i)(to(?&space))?(?&data_id)", |lex| {
let slice = lex.slice();
// Strip off any leading `to `.
let split = slice.split_whitespace();
split.last().unwrap()
})]
FieldType(&'a str),
}
fn parse_data_field(slice: &str) -> ParsedField {
let lexer = DataFieldToken::lexer(slice);
let mut name = slice;
let mut optional = false;
let mut flattened = false;
let mut list_dimensions = 0;
let mut typing = vec![];
let mut is_map = false;
for token in lexer.filter_map(|t| t.ok()) {
match token {
DataFieldToken::FieldName(field_name) => name = field_name,
DataFieldToken::Optional => optional = true,
DataFieldToken::Flattened => flattened = true,
DataFieldToken::List(dimensions) => list_dimensions = dimensions,
DataFieldToken::Map => is_map = true,
DataFieldToken::FieldType(type_name) => {
typing.push(type_name.into());
}
}
}
let typing = match (list_dimensions, is_map, typing.len()) {
(0, false, 1) => ParsedFieldType::Scalar(typing.pop().unwrap()),
(n, false, 1) if n > 0 => ParsedFieldType::List(n, typing.pop().unwrap()),
(0, true, 2) => {
let value_typing = typing.pop().unwrap();
let key_typing = typing.pop().unwrap();
ParsedFieldType::Map(key_typing, value_typing)
}
(dimensions, is_map, length) => {
todo!("malformed field: {dimensions:?} - {is_map} - {length}");
}
};
ParsedField {
name: name.into(),
docs: Range::default(),
typing,
optional,
flattened,
}
}