use colored::Colorize;
use convert_case::{Case, Casing};
use core::panic;
use lazy_static::lazy_static;
use log::error;
use std::collections::{BTreeMap, HashMap};
use std::error::Error;
use std::path::Path;
use pulldown_cmark::{CowStr, Event, HeadingLevel, OffsetIter, Options, Parser, Tag, TagEnd};
use regex::Regex;
use crate::attribute;
use crate::datamodel::DataModel;
use crate::object::{self, Enumeration, Object};
use crate::option::RawOption;
use crate::validation::Validator;
use super::frontmatter::{parse_frontmatter, FrontMatter, ImportType};
use super::position::{Position, PositionRange};
lazy_static! {
static ref MD_MODEL_TYPES: BTreeMap<&'static str, &'static str> = {
let mut m = BTreeMap::new();
m.insert(
"Equation",
include_str!("../../types/equation/equation-internal.json"),
);
m.insert(
"UnitDefinition",
include_str!("../../types/unit-definition/unit-definition-internal.json"),
);
m
};
}
const H1: Tag = Tag::Heading {
level: HeadingLevel::H1,
id: None,
classes: Vec::new(),
attrs: Vec::new(),
};
const H2: Tag = Tag::Heading {
level: HeadingLevel::H2,
id: None,
classes: Vec::new(),
attrs: Vec::new(),
};
const H3: Tag = Tag::Heading {
level: HeadingLevel::H3,
id: None,
classes: Vec::new(),
attrs: Vec::new(),
};
const H3_END: TagEnd = TagEnd::Heading(HeadingLevel::H3);
#[derive(Debug, PartialEq, Eq)]
enum ParserState {
InDefinition,
OutsideDefinition,
InHeading,
}
#[allow(clippy::result_large_err)]
pub fn parse_markdown(content: &str, path: Option<&Path>) -> Result<DataModel, Validator> {
let content = clean_content(content);
let config = parse_frontmatter(&content).unwrap_or_default();
let line_offsets = create_line_offsets(&content);
let mut model = DataModel::new(None, Some(config.clone()));
let (objects, enums) = parse_model_components(&content, &line_offsets, &mut model);
process_model_components(&mut model, objects, enums, &config);
merge_imports(&mut model, config.imports, path);
validate_model(&model)?;
Ok(model)
}
fn create_line_offsets(content: &str) -> Vec<usize> {
content
.char_indices()
.filter(|(_, c)| *c == '\n')
.map(|(i, _)| i)
.collect()
}
fn parse_model_components(
content: &str,
line_offsets: &[usize],
model: &mut DataModel,
) -> (Vec<Object>, Vec<Enumeration>) {
let mut objects = Vec::new();
let mut enums = Vec::new();
let mut options = Options::empty();
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let mut iterator = Parser::new_ext(content, options).into_offset_iter();
let mut state = ParserState::OutsideDefinition;
while let Some(event) = iterator.next() {
process_object_event(
content,
&mut iterator,
&mut objects,
event,
model,
&mut state,
line_offsets,
);
}
let mut iterator = Parser::new(content).into_offset_iter();
while let Some((event, range)) = iterator.next() {
process_enum_event(
content,
&mut iterator,
&mut enums,
(event, range),
line_offsets,
);
}
(objects, enums)
}
fn process_model_components(
model: &mut DataModel,
objects: Vec<Object>,
enums: Vec<Enumeration>,
config: &FrontMatter,
) {
let allow_empty = &config.allow_empty;
model.enums = enums.into_iter().filter(|e| e.has_values()).collect();
model.objects = objects
.into_iter()
.filter(|o| {
if *allow_empty {
!&model.enums.iter().any(|e| e.name == o.name)
} else {
o.has_attributes()
}
})
.collect();
set_enum_attributes(model);
add_internal_types(model);
add_mixin_types(model).expect("Failed to add mixin types");
}
fn merge_imports(model: &mut DataModel, imports: HashMap<String, ImportType>, path: Option<&Path>) {
for (_prefix, import) in imports {
let model_to_merge = import.fetch(path).unwrap();
model.merge(&model_to_merge);
}
}
#[allow(clippy::result_large_err)]
pub(crate) fn validate_model(model: &DataModel) -> Result<(), Validator> {
let mut validator = Validator::new();
validator.validate(model);
if !validator.is_valid {
return Err(validator);
}
Ok(())
}
fn clean_content(content: &str) -> String {
let re = Regex::new(r"<[^>]*>").unwrap();
let content = re.replace_all(content, "").to_string();
let re = Regex::new(r"\[([^]]+)]\([^)]+\)").unwrap();
let content = re.replace_all(content.as_str(), "$1").to_string();
content
}
fn get_position(content: &str, line_offsets: &[usize], start: usize, end: usize) -> Position {
let line = match line_offsets.binary_search(&start) {
Ok(line) => line + 1,
Err(line) => line + 1,
};
let line_start = if line > 1 { line_offsets[line - 2] } else { 0 };
let line_end = if line <= line_offsets.len() {
line_offsets[line - 1]
} else {
content.len()
};
let line_content = &content[line_start..line_end];
let leading_space = line_content
.chars()
.take_while(|c| c.is_whitespace())
.count();
let start_col = if line > 1 {
start - line_offsets[line - 2] + leading_space - 1
} else {
start + 1 + leading_space
};
let end_col = if line <= line_offsets.len() {
line_offsets[line - 1] - (if line > 1 { line_offsets[line - 2] } else { 0 })
} else {
end - (if line > 1 { line_offsets[line - 2] } else { 0 })
};
Position {
line,
column: PositionRange {
start: start_col,
end: end_col,
},
offset: PositionRange { start, end },
}
}
fn process_object_event(
content: &str,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut Vec<Object>,
event: (Event, std::ops::Range<usize>),
model: &mut DataModel,
state: &mut ParserState,
line_offsets: &[usize],
) {
let (event, range) = event;
match event {
Event::Start(tag) if tag == H1 => {
handle_h1_event(iterator, model);
}
Event::Start(tag) if tag == H2 => {
*state = ParserState::OutsideDefinition;
}
Event::Start(tag) if tag == H3 => {
handle_h3_start(content, iterator, objects, state, line_offsets, range);
}
Event::End(tag) if tag == H3_END => {
*state = ParserState::InDefinition;
}
Event::Text(CowStr::Borrowed(text)) if text.starts_with(":") => {
handle_type_annotation(objects, text);
}
Event::Text(CowStr::Borrowed("[")) => {
if *state == ParserState::InHeading {
handle_mixin(objects, iterator);
}
}
Event::Start(Tag::List(None)) => {
if *state == ParserState::OutsideDefinition {
return;
}
handle_list_start(content, iterator, objects, line_offsets, range);
}
Event::Start(Tag::Item) => {
if *state == ParserState::OutsideDefinition {
return;
}
handle_list_item(content, iterator, objects, line_offsets, range);
}
Event::Text(text) if text.to_string() == "]" => {
handle_array_marker(objects);
}
Event::Text(text) if *state == ParserState::InDefinition => {
handle_docstring(objects, text);
}
_ => {}
}
}
fn handle_h1_event(iterator: &mut pulldown_cmark::OffsetIter, model: &mut DataModel) {
model.name = Some(extract_name(iterator));
}
fn handle_h3_start(
content: &str,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut Vec<Object>,
state: &mut ParserState,
line_offsets: &[usize],
range: std::ops::Range<usize>,
) {
*state = ParserState::InHeading;
let mut object = process_object_heading(iterator);
object.set_position(get_position(content, line_offsets, range.start, range.end));
objects.push(object);
}
fn handle_type_annotation(objects: &mut [Object], text: &str) {
let attribute = objects.last_mut().unwrap().get_last_attribute();
if let Some(attribute) = attribute {
attribute
.add_option(RawOption::new(
"type".to_string(),
text.to_string().trim_start_matches(':').trim().to_string(),
))
.unwrap();
}
}
fn handle_mixin(objects: &mut [Object], iterator: &mut pulldown_cmark::OffsetIter) {
let last_object = objects.last_mut().unwrap();
let mixin = iterator.next();
match mixin {
Some((Event::Text(text), _)) if text.to_string() != "]" => {
last_object.mixins = text.split(',').map(|s| s.trim().to_string()).collect();
}
_ => {
error!(
"[{}] {}: Opening bracket but no mixin name. Mixin wont be applied",
last_object.name.bold(),
"SyntaxError".bold(),
);
panic!("Mixin syntax error. Expected mixin name after opening bracket.");
}
}
}
fn handle_list_start(
content: &str,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut [Object],
line_offsets: &[usize],
range: std::ops::Range<usize>,
) {
let last_object = objects.last_mut().unwrap();
if !last_object.has_attributes() {
iterator.next();
let (required, attr_name, dtypes) = extract_attr_name_required(iterator);
let mut attribute = attribute::Attribute::new(attr_name, required);
if let Some((key, dtypes)) = dtypes {
attribute.add_option(RawOption::new(key, dtypes)).unwrap();
}
attribute.set_position(get_position(content, line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
} else {
let attr_strings = extract_attribute_options(iterator);
for attr_string in attr_strings {
distribute_attribute_options(objects, attr_string);
}
}
}
fn handle_list_item(
content: &str,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut [Object],
line_offsets: &[usize],
range: std::ops::Range<usize>,
) {
let (required, attr_string, dtypes) = extract_attr_name_required(iterator);
let mut attribute = attribute::Attribute::new(attr_string, required);
if let Some((key, dtypes)) = dtypes {
attribute.add_option(RawOption::new(key, dtypes)).unwrap();
}
attribute.set_position(get_position(content, line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
}
fn handle_array_marker(objects: &mut [Object]) {
let last_object = objects.last_mut().unwrap();
let last_attribute = last_object.get_last_attribute();
if let Some(attribute) = last_attribute {
attribute.is_array = true;
}
}
fn handle_docstring(objects: &mut [Object], text: CowStr) {
let last_object = objects.last_mut().unwrap();
if !last_object.docstring.is_empty() {
last_object
.docstring
.push_str(format!(" {}", text.as_ref()).as_str());
} else {
last_object.docstring = text.as_ref().to_string();
}
last_object.docstring = last_object
.docstring
.split_whitespace()
.map(|s| s.trim())
.collect::<Vec<&str>>()
.join(" ");
}
fn process_object_heading(iterator: &mut OffsetIter) -> object::Object {
let heading = extract_name(iterator);
let (cleaned_name, term) = extract_object_term(&heading);
object::Object::new(cleaned_name, term)
}
fn extract_name(iterator: &mut OffsetIter) -> String {
if let Some((Event::Text(text), _)) = iterator.next() {
return text.to_string();
}
for _ in 0..2 {
if let Some((Event::Text(text), _)) = iterator.next() {
return text.to_string();
}
}
panic!("Could not extract name: Got {:?}", iterator.next().unwrap());
}
fn extract_attr_name_required(
iterator: &mut OffsetIter,
) -> (bool, String, Option<(String, String)>) {
let mut next = iterator.next();
if let Some((Event::Start(Tag::Paragraph), _)) = next {
next = iterator.next();
}
match next {
Some((Event::Text(text), _)) => {
if let Some((key, dtypes)) = shorthand_type(&text) {
return (false, key, Some(dtypes));
} else {
return (false, text.to_string(), None);
}
}
Some((Event::Start(Tag::Strong), _)) => {
let next = iterator.next();
let mut name = String::new();
if let Some((Event::Text(text), _)) = next {
name = text.to_string();
}
iterator.next();
return (true, name, None);
}
_ => {}
}
panic!("Could not extract attribute name. Please check the markdown file.");
}
fn shorthand_type(text: &str) -> Option<(String, (String, String))> {
if let Some((key, dtypes)) = text.split_once(":") {
Some((
key.trim().to_string(),
("type".to_string(), dtypes.trim().to_string()),
))
} else {
None
}
}
fn extract_object_term(heading: &str) -> (String, Option<String>) {
if let Some(start) = heading.rfind('(') {
if let Some(end) = heading[start..].find(')') {
let term = heading[start + 1..start + end].to_string();
let cleaned_name = heading[..start].trim_end().to_string();
return (cleaned_name, Some(term));
}
}
(heading.trim_end().to_string(), None)
}
fn extract_attribute_options(iterator: &mut OffsetIter) -> Vec<String> {
let mut options = Vec::new();
while let Some((next, _)) = iterator.next() {
match next {
Event::Start(Tag::Item) => {
let name = extract_name(iterator);
options.push(name);
}
Event::End(TagEnd::List(false)) => {
break;
}
Event::Text(text) if text.to_string() == "[" => {
let last_option = options.last_mut().unwrap();
let lower = last_option.to_lowercase();
if lower.contains("pattern:") || lower.contains("regex:") {
*last_option = format!("{last_option}[");
} else {
*last_option = format!("{}[]", last_option.trim());
}
}
Event::Text(text) if text.to_string() == "]" => {
let last_option = options.last_mut().unwrap();
let lower = last_option.to_lowercase();
if lower.contains("pattern:") || lower.contains("regex:") {
*last_option = format!("{last_option}]");
}
}
Event::Text(text) if text.to_string() != "]" => {
let last_option = options.last_mut().unwrap();
let lower = last_option.to_lowercase();
if lower.contains("description:") {
*last_option = format!("{} {}", last_option.trim(), text);
} else if lower.contains("pattern:") || lower.contains("regex:") {
*last_option = format!("{}{}", last_option.trim(), text);
}
}
_ => {}
}
}
options
}
fn add_option_to_last_attribute(
objects: &mut [object::Object],
key: String,
value: String,
) -> Result<(), Box<dyn Error>> {
let last_attr = objects.last_mut().unwrap().get_last_attribute();
if let Some(attribute) = last_attr {
let option = RawOption::new(key, value);
attribute.add_option(option)?;
}
Ok(())
}
fn distribute_attribute_options(objects: &mut [object::Object], attr_string: String) -> Option<()> {
if attr_string.contains(':') {
let (key, value) = process_option(&attr_string);
add_option_to_last_attribute(objects, key, value).expect("Failed to add option");
return None;
}
objects
.last_mut()
.unwrap()
.create_new_attribute(attr_string, false);
None
}
fn process_option(option: &String) -> (String, String) {
let parts: Vec<&str> = option.split(':').collect();
assert!(
parts.len() > 1,
"Attribute {option} does not have a valid option"
);
let key = parts[0].trim();
let value = parts[1..].join(":");
(key.to_string(), value.trim().to_string())
}
pub fn process_enum_event(
content: &str,
iterator: &mut OffsetIter,
enums: &mut Vec<Enumeration>,
event: (Event, std::ops::Range<usize>),
line_offsets: &[usize],
) {
let (event, range) = event;
match event {
Event::Start(tag) if tag == H3 => {
let enum_name = extract_name(iterator);
let mut enum_obj = Enumeration {
name: enum_name.replace(" ", "_").to_case(Case::Pascal),
mappings: BTreeMap::new(),
docstring: "".to_string(),
position: None,
};
enum_obj.set_position(get_position(content, line_offsets, range.start, range.end));
enums.push(enum_obj);
}
Event::Start(Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(_))) => {
let event = iterator.next().unwrap();
if let (Event::Text(text), _) = event {
let mappings = text.to_string();
if enums.last_mut().is_some() {
let enum_obj = enums.last_mut().unwrap();
process_enum_mappings(enum_obj, mappings);
}
}
}
_ => {}
}
}
fn process_enum_mappings(enum_obj: &mut Enumeration, mappings: String) {
let lines = mappings.split('\n');
for line in lines {
let parts: Vec<&str> = line.split('=').collect();
if parts.len() != 2 {
continue;
}
let key = parts[0].trim().replace('"', "");
let value = parts[1].trim().replace('"', "");
enum_obj.mappings.insert(key.to_string(), value.to_string());
}
}
fn add_mixin_types(model: &mut DataModel) -> Result<(), Box<dyn Error>> {
let mixins = collect_mixin_objects(model);
let mut to_merge = Vec::new();
let mut added_internals = Vec::new();
for object in model.objects.iter_mut() {
for local_mixin in object.mixins.clone() {
process_mixins(
object,
&local_mixin,
&mixins,
&mut to_merge,
&mut added_internals,
)?;
}
}
merge_internal_types(model, to_merge);
Ok(())
}
fn collect_mixin_objects(model: &DataModel) -> Vec<Object> {
model
.objects
.iter()
.filter(|o| o.mixins.is_empty())
.cloned()
.collect()
}
fn process_mixins(
object: &mut Object,
mixin_name: &str,
mixins: &[Object],
to_merge: &mut Vec<DataModel>,
added_internals: &mut Vec<String>,
) -> Result<(), Box<dyn Error>> {
if let Some(mixin) = find_mixin_in_objects(mixin_name, mixins) {
object.attributes.extend(mixin.attributes.clone());
} else if let Some(internal_type) = MD_MODEL_TYPES.get(mixin_name) {
process_internal_mixin_type(object, mixin_name, internal_type, to_merge, added_internals);
} else {
return report_missing_mixin(object, mixin_name);
}
Ok(())
}
fn find_mixin_in_objects<'a>(mixin_name: &str, mixins: &'a [Object]) -> Option<&'a Object> {
mixins.iter().find(|o| o.name == mixin_name)
}
fn process_internal_mixin_type(
object: &mut Object,
mixin_name: &str,
internal_type_json: &str,
to_merge: &mut Vec<DataModel>,
added_internals: &mut Vec<String>,
) {
let mut internal_type = serde_json::from_str::<DataModel>(internal_type_json)
.expect("Failed to parse internal data type");
let target_obj = internal_type.objects[0].clone();
internal_type.objects.remove(0);
object.attributes.extend(target_obj.attributes.clone());
if !added_internals.contains(&mixin_name.to_string()) {
to_merge.push(internal_type);
added_internals.push(mixin_name.to_string());
}
}
fn report_missing_mixin(object: &Object, mixin_name: &str) -> Result<(), Box<dyn Error>> {
error!(
"[{}] {}: Mixin {} does not exist.",
object.name.red().bold(),
"InheritanceError".bold(),
mixin_name.red().bold(),
);
Err("Object has a mixin that does not exist".into())
}
fn merge_internal_types(model: &mut DataModel, to_merge: Vec<DataModel>) {
for internal in to_merge {
model.merge(&internal);
}
}
fn add_internal_types(model: &mut DataModel) {
let mut all_types = vec![];
for object in &model.objects {
for attr in &object.attributes {
all_types.extend(attr.dtypes.clone());
}
}
let object_names = model
.objects
.iter()
.map(|obj| obj.name.clone())
.collect::<Vec<String>>();
for (name, content) in MD_MODEL_TYPES.iter() {
if object_names.contains(&name.to_string()) {
continue;
}
if all_types.contains(&name.to_string()) {
model.merge(
&serde_json::from_str::<DataModel>(content)
.expect("Failed to parse internal data type"),
)
}
}
}
fn set_enum_attributes(model: &mut DataModel) {
let enums = model
.enums
.iter()
.map(|e| e.name.clone())
.collect::<Vec<String>>();
for object in model.objects.iter_mut() {
for attr in object.attributes.iter_mut() {
let enum_dtypes: Vec<String> = attr
.dtypes
.iter()
.filter(|dtype| enums.contains(dtype))
.cloned()
.collect();
if !enum_dtypes.is_empty() && enum_dtypes.len() == attr.dtypes.len() {
attr.is_enum = true;
}
}
}
}
pub(crate) enum OptionKey {
Type,
Term,
Description,
Xml,
Default,
Multiple,
Other,
}
impl OptionKey {
pub fn from_str(key: &str) -> Self {
match key.to_lowercase().as_str() {
"type" => OptionKey::Type,
"term" => OptionKey::Term,
"description" => OptionKey::Description,
"xml" => OptionKey::Xml,
"default" => OptionKey::Default,
"multiple" => OptionKey::Multiple,
_ => OptionKey::Other,
}
}
}