#![allow(non_snake_case)]
use indextree::{Arena, NodeId};
use regex::Regex;
use fancy_regex::Regex as Reg;
use lazy_static::lazy_static;
#[allow(dead_code)]
mod text_format {
pub const ENDC: &str = "\033[0m";
pub const HEADER: &str = "\033[95m";
pub const BLUE: &str = "\033[94m";
pub const GREEN: &str = "\033[92m";
pub const YELLOW: &str = "\033[93m";
pub const RED: &str = "\033[91m";
pub const CYAN: &str = "\033[96m";
pub const MAGENTA: &str = "\033[95m";
pub const WHITE: &str = "\033[97m";
pub const BLACK: &str = "\033[90m";
pub const BRIGHT_BLUE: &str = "\033[94;1m";
pub const BRIGHT_GREEN: &str = "\033[92;1m";
pub const BRIGHT_YELLOW: &str = "\033[93;1m";
pub const BRIGHT_RED: &str = "\033[91;1m";
pub const BRIGHT_CYAN: &str = "\033[96;1m";
pub const BRIGHT_MAGENTA: &str = "\033[95;1m";
pub const BRIGHT_WHITE: &str = "\033[97;1m";
pub const BRIGHT_BLACK: &str = "\033[90;1m";
pub const BOLD: &str = "\033[1m";
pub const UNDERLINE: &str = "\033[4m";
}
lazy_static! {
static ref RE_END_STATEMENT: Regex = Regex::new(r#"(?im)^\s*end(\s+([\w\.]+))?\s*;"#).expect("Failed to compile RE_END_STATEMENT");
static ref RE_END_SEARCH: Regex = Regex::new(r"\s*(is|;)").expect("Failed to compile RE_END_SEARCH");
static ref RE_END_IS_OR_SEMI: Regex = Regex::new(r"(?m)\s*(is|;)").expect("Failed to compile RE_END_IS_OR_SEMI");
static ref RE_SEMICOLON: Regex = Regex::new(r";").expect("Failed to compile RE_SEMICOLON");
static ref RE_PACKAGE: Reg = Reg::new(r#"(?im)^(?!\s*--)\s*(?P<type>\b(?:generic|separate)\b)?\s*(?P<category>\bpackage\b)(?:\s+(?P<body>\bbody\b))?\s+(?P<name>\S+)"#).expect("Failed to compile RE_PACKAGE");
static ref RE_FUNC_PROC: Reg = Reg::new(r"(?im)^(?!\s*--)(?:^[^\S\n]*|(?<=\n))(?P<category>\bprocedure|function\b)\s+(?P<name>[^\s\(\;]*)(?:\s*\((?P<params>[\s\S]*?(?=\)))\))?(?:\s*return\s*(?P<return_statement>[\w\.\_\-]+))?").expect("Failed to compile RE_FUNC_PROC");
static ref RE_TYPE_SUBTYPE: Reg = Reg::new(r#"(?ims)^(?!\s*--)(?:^[^\S\n]*|(?<=\n))(?P<category>\btype\b|\bsubtype\b)\s+(?P<name>.*?)(?:\s+is(?:\s+new\s+(?P<base_type>[\w\._\-]+)|(?:\s*(?P<tuple_type>\([^)]+\))|\s*(?P<type_kind>[\w\._\-]+(?:\s*range\s*.*?)?))))"#).expect("Failed to compile RE_TYPE_SUBTYPE");
static ref RE_REPR_CLAUSE: Reg = Reg::new(r#"(?im)^(?!\s*--)(?:^[^\S\n]*|(?<=\n))(?P<category>\bfor\b)\s+(?P<name>\w+)\s+use\s+(?P<type_kind>\brecord\b)"#).expect("Failed to compile RE_REPR_CLAUSE");
static ref RE_REPR_AT: Reg = Reg::new(r#"(?ims)^(?!\s*--)(?:^[^\S\n]*|(?<=\n))(?P<category>\bfor\b)\s+(?P<name>\S+)\s+use\s+at\s+(?P<address>.*?);"#).expect("Failed to compile RE_REPR_AT");
static ref RE_DECLARE: Reg = Reg::new(r#"(?im)^(?!\s*--)\s*(?P<declare>\bdeclare\b)"#).expect("Failed to compile RE_DECLARE");
static ref RE_SIMPLE_LOOP: Regex = Regex::new(r"(?im)^\s*(?P<Captureloop>\bloop\b)").expect("Failed to compile RE_SIMPLE_LOOP");
static ref RE_EXIT_WHEN: Regex = Regex::new(r"(?im)^\s*exit\s+when\s*(?P<exitcond1>[^;]*)").expect("Failed to compile RE_EXIT_WHEN");
static ref RE_WHILE_LOOP: Regex = Regex::new(r"(?ims)(?P<Capturewhile>\bwhile\b)\s*(?P<exitcond2>.*?)\s*\bloop\b[^\n;]*").expect("Failed to compile RE_WHILE_LOOP");
static ref RE_FOR_LOOP: Reg = Reg::new(r#"(?i)(?P<Capturefor>\bfor\b)\s*(?P<index>.*?)\s*\bin\b\s*(?:(?P<loop_direction>.*?))?\s*(?P<primavar>[^\s]*)\s*(?:(?=\brange\b)\brange\b\s*(?P<frst>(?:.|\n)*?)\s+\.\.\s*(?P<scnd>(?:.|\n)*?)\s+\bloop\b|(?:(?=\.\.)\.\.\s*(?P<range_end>(?:.|\n)*?)\s*\bloop\b|\s*\bloop\b))"#).expect("Failed to compile RE_FOR_LOOP");
static ref RE_IF: Reg = Reg::new(r"(?ims)^\s*(?P<ifstat>\bif\b)(?P<Condition>.*?)(?<!\band\b\s)then").expect("Failed to compile RE_IF");
static ref RE_ELSIF: Reg = Reg::new(r#"(?ims)^\s*(?P<elsifstat>\belsif\b)(?P<Condition>.*?)(?<!\band\b\s)then"#).expect("Failed to compile RE_ELSIF");
static ref RE_ELSE: Reg = Reg::new(r#"(?ims)^\s*(?P<elsestat>\belse\b)"#).expect("Failed to compile RE_ELSE");
static ref RE_CASE: Reg = Reg::new(r#"(?ims)(?<!end\s)(?P<Casestmnt>\bcase\b)\s*(?P<var>.*?)\s*\bis\b"#).expect("Failed to compile RE_CASE");
static ref RE_WHEN: Reg = Reg::new(r#"(?ims)^(?P<spaces> *)\bwhen\b\s*(?P<caso>.*?)\s*=>\s*(?:(?=\bcase\b)(?P<doublecase>\bcase\b)[\s\S]*?\bis\b|)"#).expect("Failed to compile RE_WHEN");
static ref RE_TASK: Reg = Reg::new(r#"(?im)^(?!\s*--)\s*(?P<category>\btask\b)(?:\s+(?P<body>\bbody\b))?\s+(?P<name>\S+)"#).expect("Failed to compile RE_TASK");
static ref RE_ENTRY: Reg = Reg::new(r"(?ims)^(?!\s*--)(?:^[^\S\n]*|(?<=\n))(?P<category>\bentry\b)\s+(?P<name>[^\s\(\;]*)(?:\s*\((?P<params>[\s\S]*?(?=\)))\))?(?:\s+when\s+(?P<condition>.*?))?(?=\s*(?:is|;))").expect("Failed to compile RE_ENTRY");
static ref RE_VAR_DECL: Reg = Reg::new(r#"(?m)^[^\S\n]*(?P<names_list>[A-Za-z_][A-Za-z0-9_]*(?:[^\S\n]*,[^\S\n]*[A-Za-z_][A-Za-z0-9_]*)*)[^\S\n]*:[^\S\n]*(?P<data_type>[A-Za-z_][^\n:;]*?)(?:[^\S\n]*:=[^\S\n]*(?P<default_value>[^\n;]+?))?[^\S\n]*;"#).expect("Failed to compile RE_VAR_DECL");
static ref RE_CLEAN_CODE: Regex = Regex::new(r#"(?m)(?P<string>\"[^\"]*\")|(?P<comment>--.*$)"#).expect("Failed to compile RE_CLEAN_CODE");
}
#[derive(Debug)]
pub enum ASTError {
MatchItemMissing,
InvalidCapture,
TreeBuildError,
NoMatchFound,
NodeIdMissing(String),
StartNodeNotFound,
NodeNotInArena(String),
RegexError,
}
impl std::error::Error for ASTError {}
impl std::fmt::Display for ASTError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ASTError::MatchItemMissing => write!(f, "Required match item was missing"),
ASTError::InvalidCapture => write!(f, "Required capture group was missing"),
ASTError::TreeBuildError => write!(f, "Failed to build AST"),
ASTError::NoMatchFound => write!(f, "No match found in the provided text"),
ASTError::NodeIdMissing(node_id) => write!(f, "NodeId {} is missing", node_id),
ASTError::StartNodeNotFound => write!(f, "Start node not found in arena during end line association"),
ASTError::NodeNotInArena(node_id) => write!(f, "NodeId {} not found in arena", node_id),
ASTError::RegexError => write!(f, "Regex error occurred"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Unaries {
NOT,
}
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, PartialEq)]
pub enum Memberships {
NOT_IN,
IN,
}
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, PartialEq)]
pub enum Binaries {
AND,
OR,
AND_THEN,
OR_ELSE,
XOR,
INFERIOR,
SUPERIOR,
INFERIOR_OR_EQUAL,
SUPERIOR_OR_EQUAL,
EQUAL,
UNEQUAL,
}
#[derive(Debug, Clone)]
pub enum Expression {
Unary(UnaryExpression),
Binary(BinaryExpression),
Membership(MembershipExpression),
Condition(ConditionExpr),
Literal(String),
}
#[derive(Debug, Clone)]
pub struct UnaryExpression {
pub op: Unaries,
pub operand: Box<Expression>,
pub condstring: String,
}
#[derive(Debug, Clone)]
pub struct BinaryExpression {
pub op: Binaries,
pub left: Box<Expression>,
pub right: Box<Expression>,
pub condstring: String,
}
#[derive(Debug, Clone)]
pub struct MembershipExpression {
pub op: Memberships,
pub left: Box<Expression>,
pub right: Box<Expression>,
pub condstring: String,
}
#[derive(Debug, Clone)]
pub struct ConditionExpr {
pub list: Vec<Expression>,
pub albero: Option<Box<Expression>>,
}
#[derive(Debug, Clone)]
pub struct ArgumentData {
pub name: String,
pub mode: String,
pub data_type: String,
pub default_value: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ReturnKeywordData {
pub data_type: Option<String>,
}
#[derive(Debug, Clone)]
pub struct EndStatement {
pub word: String,
pub index: usize,
pub line: usize,
pub end_index: usize,
}
const UNNAMED_END_KEYWORDS: [&str; 4] = ["loop", "if", "case", "record"];
enum ParseEvent {
StartNodeId { node_id: NodeId, index: usize },
End { word: String, index: usize, line: usize, end_index: usize },
}
#[derive(Debug,Clone)]
pub struct NodeData {
pub name: String,
pub node_type: String,
pub start_line: Option<usize>,
pub end_line: Option<usize>,
pub start_index: Option<usize>,
pub end_index: Option<usize>,
pub column: Option<usize>,
pub body_start: Option<usize>,
pub parent : Option<Box<NodeData>>,
pub is_body: Option<bool>,
pub pkg_name: Option<String>,
pub category_type: Option<String>,
pub arguments: Option<Vec<ArgumentData>>,
pub return_type: Option<ReturnKeywordData>,
pub type_kind: Option<String>,
pub base_type: Option<String>,
pub tuple_values: Option<Vec<String>>,
pub conditions: Option<ConditionExpr>,
pub iterator: Option<String>,
pub range_start: Option<String>,
pub range_end: Option<String>,
pub direction: Option<String>,
pub range_var: Option<String>,
pub iterator_type: Option<String>,
pub switch_expression: Option<String>,
pub cases: Option<Vec<String>>,
}
impl NodeData {
pub fn new(name: String, node_type: String, start_line: Option<usize>, start_index: Option<usize>,is_body: bool) -> Self {
NodeData {
name,
node_type,
start_line,
end_line: None, start_index,
end_index: None, column: None,
is_body: Some(is_body),
pkg_name: None,
category_type: None,
arguments: None,
return_type: None,
type_kind: None,
base_type: None,
tuple_values: None,
conditions: None,
iterator: None,
range_start: None,
range_end: None,
direction: None,
range_var: None,
iterator_type: None,
switch_expression: None,
cases: None,
parent: None,
body_start: None,
}
}
pub fn print_info(&self) {
println!("{} Â Category: {} {}{}, ", text_format::BLUE, text_format::ENDC, self.node_type, ", ");
println!("{} Â Name: {} {}{}", text_format::BLUE, text_format::ENDC, self.name, ", ");
if let Some(start_line) = self.start_line {
println!("{} Â Start Line: {} {}{}", text_format::BLUE, text_format::ENDC, start_line, ", ");
}
if let Some(end_line) = self.end_line {
println!("{} Â End Line: {} {}{}", text_format::BLUE, text_format::ENDC, end_line, ", ");
}
if let Some(pkg_name) = &self.pkg_name {
println!("{} Package Name: {} {}{}", text_format::BLUE, text_format::ENDC, pkg_name, ", ");
}
if let Some(category_type) = &self.category_type {
println!("{} Category Type: {} {}{}", text_format::BLUE, text_format::ENDC, category_type, ", ");
}
if let Some(is_body) = self.is_body {
println!("{} Â Is Body: {} {}{}", text_format::BLUE, text_format::ENDC, if is_body { "Body" } else { "Spec" }, ", ");
}
if let Some(arguments) = &self.arguments {
println!("{} Â Arguments: {}", text_format::GREEN, text_format::ENDC);
for arg in arguments {
println!("{} Name: {} {}{}", text_format::YELLOW, text_format::ENDC, arg.name, ", ");
println!("{} Mode: {} {}{}", text_format::YELLOW, text_format::ENDC, arg.mode, ", ");
println!("{} Data Type: {} {}{}", text_format::YELLOW, text_format::ENDC, arg.data_type, ", ");
if let Some(default_value) = &arg.default_value {
println!("{} Default Value: {} {}{}", text_format::YELLOW, text_format::ENDC, default_value, ", ");
}
println!(" ------");
}
}
if let Some(return_type) = &self.return_type {
if let Some(data_type) = &return_type.data_type {
println!("{} Return Type: {} {}{}", text_format::RED, text_format::ENDC, data_type, ", ");
}
}
if let Some(type_kind) = &self.type_kind {
println!("{} Type Kind: {} {}{}", text_format::BLUE, text_format::ENDC, type_kind, ", ");
}
if let Some(base_type) = &self.base_type {
println!("{} Base Type: {} {}{}", text_format::RED, text_format::ENDC, base_type, ", ");
}
if let Some(tuple_values) = &self.tuple_values {
println!("{} Tuple Values: {} {:?}", text_format::RED, text_format::ENDC, tuple_values);
}
if let Some(conditions) = &self.conditions {
println!("{} Conditions: {}", text_format::GREEN, text_format::ENDC);
let cond_list = &conditions.list ;
for cond_expr in cond_list {
match cond_expr {
Expression::Unary(unary_expr) => {
println!("{} Unary Cond: {:?} {:?}{:?}{:?}", text_format::YELLOW, text_format::ENDC, unary_expr.op, unary_expr.operand, ", ");
},
Expression::Binary(binary_expr) => {
println!("{} Binary Cond: {:?} {:?}{:?}{:?}{:?}", text_format::YELLOW, text_format::ENDC, binary_expr.left, binary_expr.op, binary_expr.right, ", ");
},
Expression::Membership(membership_expr) => {
println!("{} Membership Cond: {:?} {:?}{:?}{:?}{:?}", text_format::YELLOW, text_format::ENDC, membership_expr.left, membership_expr.op, membership_expr.right, ", ");
},
Expression::Literal(literal_expr) => {
println!("{} Literal Cond: {:?} {:?}{:?}", text_format::YELLOW, text_format::ENDC, literal_expr, ", ");
}
Expression::Condition(_condition_expr) => {
println!("{} Nested Condition Expr: {:?} {:?}", text_format::YELLOW, text_format::ENDC, _condition_expr);
}
}
println!(" ------");
}
}
if let Some(iterator) = &self.iterator {
println!("{} Iterator: {} {}{}", text_format::CYAN, text_format::ENDC, iterator, ", ");
}
if let Some(iterator_type) = &self.iterator_type {
println!("{} Iterator Type: {} {}{}", text_format::YELLOW, text_format::ENDC, iterator_type, ", ");
}
if let Some(range_var) = &self.range_var {
println!("{} Range Var: {} {}{}", text_format::WHITE, text_format::ENDC, range_var, ", ");
}
if let Some(range_start) = &self.range_start {
println!("{} Range Start: {} {}{}", text_format::WHITE, text_format::ENDC, range_start, ", ");
}
if let Some(range_end) = &self.range_end {
println!("{} Range End: {} {}{}", text_format::GREEN, text_format::ENDC, range_end, ", ");
}
if let Some(direction) = &self.direction {
println!("{} Direction: {} {}{}", text_format::RED, text_format::ENDC, direction, ", ");
}
if let Some(switch_expression) = &self.switch_expression {
println!("{} Switch Expression: {} {}{}", text_format::BRIGHT_BLUE, text_format::ENDC, switch_expression, ", ");
}
if let Some(cases) = &self.cases {
println!("{} Cases: {}", text_format::RED, text_format::ENDC);
for case in cases {
println!("{} Case: {} {}{}", text_format::RED, text_format::ENDC, case, ", ");
println!(" ------");
}
}
}
}
pub struct AST {
pub arena: Arena<NodeData>,
pub root_id: NodeId,
pub nodes_data: Vec<NodeData>,
pub node_ids: Vec<Option<NodeId>>,
}
impl AST {
pub fn new(nodes_data: Vec<NodeData>) -> Self {
let mut arena = Arena::new();
let root_id = arena.new_node(NodeData::new("root".to_string(), "RootNode".to_string(), None, None, false)); let node_ids = vec![None; nodes_data.len()];
AST {
arena,
root_id,
nodes_data,
node_ids,
}
}
pub fn root_id(&self) -> NodeId {
self.root_id
}
pub fn arena(&self) -> &Arena<NodeData> {
&self.arena
}
fn associate_end_lines_in_arena(&mut self, code_text: &str, node_ids: &[NodeId]) -> Result<(), ASTError> {
let end_statements = AST::extract_end_statements(code_text)?;
let start_node_ids: Vec<(NodeId, usize)> = node_ids.iter()
.filter(|&&node_id| self.arena.get(node_id).unwrap().get().end_line.is_none()) .map(|&node_id| (node_id, self.arena.get(node_id).unwrap().get().start_index.unwrap_or(0))) .collect();
let mut events: Vec<ParseEvent> = Vec::new();
for &(node_id, start_index) in &start_node_ids {
events.push(ParseEvent::StartNodeId { node_id, index: start_index });
}
for end_statement in &end_statements {
events.push(ParseEvent::End {
word: end_statement.word.clone(),
index: end_statement.index,
line: end_statement.line,
end_index: end_statement.end_index,
});
}
events.sort_by_key(|e| match e {
ParseEvent::StartNodeId { index, .. } => *index,
ParseEvent::End { index, .. } => *index,
});
let mut stack: Vec<NodeId> = vec![];
for event in events {
match event {
ParseEvent::StartNodeId { node_id, .. } => {
stack.push(node_id);
}
ParseEvent::End { word, line, end_index, .. } => {
let match_logic = |node_id: &NodeId| -> bool {
if let Some(node) = self.arena.get(*node_id) {
let node_data = node.get();
let keyword_type = AST::get_end_keyword(&node_data.node_type);
if word.is_empty() {
keyword_type == Some("") || keyword_type == Some("name")
} else if UNNAMED_END_KEYWORDS.contains(&word.as_str()) { keyword_type == Some(word.as_str())
} else {
node_data.name == word && keyword_type == Some("name")
}
} else { false }
};
if let Some(pos) = stack.iter().rposition(match_logic) {
let node_id_to_update = stack.remove(pos);
if let Some(node) = self.arena.get_mut(node_id_to_update) {
let node_data = node.get_mut();
node_data.end_line = Some(line);
node_data.end_index = Some(end_index);
} else {
return Err(ASTError::NodeNotInArena(format!("NodeId {:?} not found during update", node_id_to_update)));
}
} else {
eprintln!("Warning: Unmatched 'end {}' at line {}", word, line);
}
}
}
}
if !stack.is_empty() {
eprintln!("Warning: Stack not empty after associating end lines. Mismatched blocks?");
}
Ok(())
}
pub fn build(&mut self, code_text: &str) -> Result<(), ASTError> {
self.nodes_data.sort_by_key(|n| n.start_index.unwrap_or(0));
self.arena = Arena::new();
self.root_id = self.arena.new_node(NodeData::new("root".to_string(), "RootNode".to_string(), None, None, false));
let node_ids: Vec<NodeId> = self.nodes_data.iter()
.map(|node_data| self.arena.new_node(node_data.clone()))
.collect();
self.associate_end_lines_in_arena(code_text, &node_ids)?;
let mut stack: Vec<NodeId> = vec![self.root_id];
for (index, ¤t_node_id) in node_ids.iter().enumerate() {
let current_start_line = {
let node_data_ref = self.arena.get(current_node_id)
.ok_or_else(|| ASTError::NodeNotInArena(format!("NodeId {:?} not found", current_node_id)))?
.get();
node_data_ref.start_line.unwrap_or(0)
};
while let Some(&parent_id) = stack.last() {
if parent_id == self.root_id {
break; }
let parent_data = self.arena.get(parent_id)
.ok_or_else(|| ASTError::NodeNotInArena(format!("Parent NodeId {:?} not found", parent_id)))?
.get();
if let Some(parent_end_line) = parent_data.end_line {
if parent_end_line < current_start_line {
stack.pop();
} else {
break;
}
} else {
break;
}
}
if let Some(&parent_node_id) = stack.last() {
parent_node_id.append(current_node_id, &mut self.arena);
} else {
self.root_id.append(current_node_id, &mut self.arena);
}
let node_data_original = &self.nodes_data[index];
if node_data_original.end_line.is_none() {
stack.push(current_node_id);
}
}
Ok(())
}
pub fn print_tree(&self) {
println!("{}", self.output_tree());
}
pub fn output_tree(&self) -> String {
let mut output = String::new();
for node_id in self.root_id.descendants(&self.arena) {
let indent_level = node_id.ancestors(&self.arena).count() - 1; let indent = " ".repeat(indent_level);
let node = self.arena.get(node_id).unwrap();
output += &format!(
"{}{}{}{} - {}{}{}",
indent,
text_format::RED,
node.get().node_type,
text_format::ENDC,
text_format::BLUE,
node.get().name,
text_format::ENDC
);
if let (Some(start_line), Some(end_line)) = (node.get().start_line, node.get().end_line) {
if start_line != usize::MAX && end_line != usize::MAX { output += &format!(" (start_line: {}, end_line: {})", start_line, end_line);
}
}
output += "\n";
}
output
}
pub fn print_nodes_info(&self) -> Result<(), ASTError> {
for index in 0..self.nodes_data.len() {
if let Some(node_id) = self.node_ids[index] {
let node = self.arena.get(node_id).ok_or_else(||ASTError::NodeNotInArena(format!("Node with ID {:?} not found in arena", node_id)))?;
node.get().print_info();
}
}
Ok(())
}
pub fn get_end_keyword(node_type: &str) -> Option<&'static str> {
match node_type {
"PackageNode" => Some("name"),
"ProcedureNode" => Some("name"),
"FunctionNode" => Some("name"),
"TaskNode" => Some("name"),
"EntryNode" => Some("name"),
"SimpleLoop" => Some("loop"),
"WhileLoop" => Some("loop"),
"ForLoop" => Some("loop"),
"IfStatement" => Some("if"),
"CaseStatement" => Some("case"),
"TypeDeclaration" => Some("record"),
"DeclareNode" => Some(""),
"ElsifStatement" => None,
"ElseStatement" => None,
_ => None,
}
}
pub fn extract_end_statements(code_text: &str) -> Result<Vec<EndStatement>, ASTError> {
let re = &RE_END_STATEMENT;
let mut ends = vec![];
for cap in re.captures_iter(code_text) {
let entire_match = cap.get(0).ok_or(ASTError::InvalidCapture)?;
let word = cap.get(2).map_or("", |m| m.as_str()).to_string();
let index = entire_match.start();
let line = code_text[..index].matches('\n').count() + 1;
let end_index = entire_match.end();
ends.push(EndStatement { word, index, line, end_index });
}
Ok(ends)
}
pub fn extract_packages(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let package_pattern = &RE_PACKAGE;
let mut nodes: Vec<NodeData> = Vec::new();
let matches: Vec<_> = package_pattern.captures_iter(code_text)
.collect::<Result<Vec<_>, _>>()
.map_err(|_| ASTError::RegexError)?;
println!("{:?}",matches);
let mut sorted_matches: Vec<_> = matches.into_iter().collect();
sorted_matches.sort_by(|a, b| {
let a_name = a.name("name").unwrap().as_str();
let b_name = b.name("name").unwrap().as_str();
let a_base = a_name.split('.').next().unwrap();
let b_base = b_name.split('.').next().unwrap();
a_base.cmp(b_base)
.then(a_name.contains('.').cmp(&b_name.contains('.')))
.then(a.get(0).unwrap().start().cmp(&b.get(0).unwrap().start()))
});
for mat in sorted_matches {
let full_match = mat.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_keyword = mat.name("category").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..category_keyword.start()].matches('\n').count() + 1;
let start_index = category_keyword.start();
let name = mat.name("name").unwrap().as_str().to_string();
let is_body = mat.name("body").is_some();
let search_text = &code_text[full_match.end()..];
let end_search = RE_END_SEARCH.find(search_text);
let mut node = NodeData::new(name.clone(), "PackageNode".to_string(), Some(start_line), Some(start_index), is_body);
if let Some(end_match) = end_search {
if end_match.as_str().trim() == ";" {
node.end_line = Some(start_line);
node.end_index = Some(full_match.end() + end_match.end());
}
}
let depth_dot_level = name.chars().filter(|&c| c == '.').count();
if depth_dot_level > 0 {
let parts: Vec<&str> = name.split('.').collect();
let mut current_parent = None;
for depth in 0..depth_dot_level {
let parent_name = parts[depth].to_string();
let parent_node = nodes.iter_mut().find(|n| n.name == parent_name);
if let Some(parent) = parent_node {
if start_index < parent.start_index.unwrap_or(usize::MAX) && parent.end_line.is_some() {
parent.start_index = Some(start_index);
parent.start_line = Some(start_line);
}
current_parent = Some(parent.clone());
} else {
let mut parent = NodeData::new(
parent_name.clone(),
"PackageNode".to_string(),
Some(start_line),
Some(start_index),
true, );
parent.end_line = Some(usize::MAX); if let Some(prev_parent) = current_parent.take() {
parent.parent = Some(Box::new(prev_parent));
}
nodes.push(parent.clone());
current_parent = Some(parent);
}
}
node.name = parts[depth_dot_level].to_string();
node.parent = current_parent.map(Box::new);
}
nodes.push(node);
}
nodes.sort_by(|a, b| a.start_line.unwrap_or(usize::MAX).cmp(&b.start_line.unwrap_or(usize::MAX)));
Ok(nodes)
}
pub fn extract_procedures_functions(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let func_proc_pattern = &RE_FUNC_PROC;
let end_pattern = &RE_END_IS_OR_SEMI;
let mut nodes: Vec<NodeData> = Vec::new();
for cap_res in func_proc_pattern.captures_iter(code_text) {
let mat = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = mat.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_keyword = mat.name("category").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..category_keyword.start()].matches('\n').count() + 1;
let start_index = category_keyword.start();
let category = category_keyword.as_str().to_lowercase();
let name = mat.name("name").unwrap().as_str().to_string();
let mut is_body = false;
let search_text = &code_text[full_match.end()..];
let end_match_opt = end_pattern.find(search_text);
if let Some(end_match) = end_match_opt {
if end_match.as_str().trim() == "is" {
let text_after_is = &search_text[end_match.end()..];
if !text_after_is.trim_start().starts_with("new") {
is_body = true;
}
}
}
let mut node = NodeData::new(
name.clone(),
if category == "function" { "FunctionNode" } else { "ProcedureNode" }.to_string(),
Some(start_line),
Some(start_index),
is_body,
);
node.arguments = Some(AST::parse_parameters(mat.name("params").map(|m| m.as_str())));
if !is_body {
let spec_search_text = &code_text[full_match.end()..];
if let Some(semicolon_match) = RE_SEMICOLON.find(spec_search_text) {
let end_pos = full_match.end() + semicolon_match.end();
node.end_line = Some(code_text[..end_pos].matches('\n').count() + 1);
node.end_index = Some(end_pos);
}
}
let depth_dot_level = name.chars().filter(|&c| c == '.').count();
if depth_dot_level > 0 {
let parts: Vec<&str> = name.split('.').collect();
let mut current_parent = None;
for depth in 0..depth_dot_level {
let parent_name = parts[depth].to_string();
let parent_node = nodes.iter_mut().find(|n| n.name == parent_name);
if let Some(parent) = parent_node {
if start_index < parent.start_index.unwrap_or(usize::MAX) && parent.end_line.is_some() {
parent.start_index = Some(start_index);
parent.start_line = Some(start_line);
}
current_parent = Some(parent.clone());
} else {
let mut parent = NodeData::new(
parent_name.clone(),
"PackageNode".to_string(),
Some(start_line),
Some(start_index),
true,
);
parent.end_line = Some(usize::MAX); if let Some(prev_parent) = current_parent.take() {
parent.parent = Some(Box::new(prev_parent));
}
nodes.push(parent.clone());
current_parent = Some(parent);
}
}
node.name = parts[depth_dot_level].to_string();
node.parent = current_parent.map(Box::new);
}
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_type_declarations(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let type_subtype_pattern = &RE_TYPE_SUBTYPE;
let repr_clause_pattern = &RE_REPR_CLAUSE;
let repr_at_pattern = &RE_REPR_AT;
let mut nodes: Vec<NodeData> = Vec::new();
for cap_res in type_subtype_pattern.captures_iter(code_text) {
let caps = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = caps.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_match = caps.name("category").ok_or(ASTError::InvalidCapture)?;
let start_index = full_match.start();
let start_line = code_text[..start_index].matches('\n').count() + 1;
let column = start_index - code_text[..start_index].rfind('\n').map_or(0, |i| i + 1);
let category = category_match.as_str().to_lowercase();
let name = caps.name("name").ok_or(ASTError::InvalidCapture)?.as_str().trim().to_string();
let type_kind = caps.name("type_kind").map(|m| m.as_str().trim())
.or_else(|| caps.name("tuple_type").map(|_| "tuple"))
.or_else(|| caps.name("base_type").map(|_| "derived"))
.unwrap_or("")
.to_lowercase();
let tuple_values = if let Some(tuple_match) = caps.name("tuple_type") {
let values: Vec<String> = tuple_match.as_str()
.trim_matches(|c| c == '(' || c == ')') .split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
Some(values)
} else {
None
};
let (end_line, end_index) = if type_kind == "record" {
(None, None) } else {
let end_pos = code_text[full_match.end()..].find(';').map(|p| full_match.end() + p + 1);
if let Some(pos) = end_pos {
(Some(code_text[..pos].matches('\n').count() + 1), Some(pos))
} else {
(None, None)
}
};
let mut node = NodeData::new(
name,
"TypeDeclaration".to_string(),
Some(start_line),
Some(start_index),
false );
node.column = Some(column);
node.end_line = end_line;
node.end_index = end_index;
node.category_type = Some(category); node.type_kind = Some(type_kind);
node.base_type = caps.name("base_type").map(|m| m.as_str().to_lowercase());
node.tuple_values = tuple_values;
nodes.push(node);
}
for cap_res in repr_clause_pattern.captures_iter(code_text) {
let caps = cap_res.map_err(|_| ASTError::RegexError)?;
let category_keyword = caps.name("category").ok_or(ASTError::InvalidCapture)?;
let start_index = category_keyword.start();
let start_line = code_text[..start_index].matches('\n').count() + 1;
let column = start_index - code_text[..start_index].rfind('\n').map_or(0, |i| i + 1);
let mut node = NodeData::new(
caps.name("name").ok_or(ASTError::InvalidCapture)?.as_str().to_string(),
"TypeDeclaration".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.column = Some(column);
node.category_type = Some("for".to_string()); node.type_kind = Some("record".to_string());
nodes.push(node);
}
for cap_res in repr_at_pattern.captures_iter(code_text) {
let caps = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = caps.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_keyword = caps.name("category").ok_or(ASTError::InvalidCapture)?;
let start_index = category_keyword.start();
let start_line = code_text[..start_index].matches('\n').count() + 1;
let column = start_index - code_text[..start_index].rfind('\n').map_or(0, |i| i + 1);
let name = caps.name("name").ok_or(ASTError::InvalidCapture)?.as_str().to_string();
let address = caps.name("address").ok_or(ASTError::InvalidCapture)?.as_str().trim().to_string();
let mut node = NodeData::new(
name,
"TypeDeclaration".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.column = Some(column);
node.category_type = Some("for".to_string());
node.type_kind = Some("at_clause".to_string()); node.base_type = Some(address);
node.end_line = Some(code_text[..full_match.end()].matches('\n').count() + 1);
node.end_index = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_declare_blocks(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let declare_pattern = &RE_DECLARE;
let mut nodes = Vec::new();
for cap_res in declare_pattern.captures_iter(code_text) {
let caps = cap_res.map_err(|_| ASTError::RegexError)?;
let mat = caps.name("declare").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..mat.start()].matches('\n').count() + 1;
let start_index = mat.start();
let node = NodeData::new(
"DeclareBlock".to_string(),
"DeclareNode".to_string(),
Some(start_line),
Some(start_index),
true,
);
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_control_flow_nodes(code_text: &str) -> Result<Vec<NodeData>,ASTError> {
let mut new_nodes_data: Vec<NodeData> = Vec::new();
new_nodes_data.extend(AST::extract_simple_loops(code_text)?);
new_nodes_data.extend(AST::extract_while_loops(code_text)?);
new_nodes_data.extend(AST::extract_for_loops(code_text)?);
Ok(new_nodes_data) }
pub fn extract_simple_loops(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let simpleloops_pattern = &RE_SIMPLE_LOOP;
let mut nodes = Vec::new();
for mat in simpleloops_pattern.captures_iter(code_text) {
let full_match = mat.get(0).ok_or(ASTError::InvalidCapture)?; let loop_keyword = mat.name("Captureloop").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..loop_keyword.start()].matches('\n').count() + 1;
let start_index = loop_keyword.start();
let mut node = NodeData::new(
"SimpleLoop".to_string(),
"SimpleLoop".to_string(),
Some(start_line),
Some(start_index),
false, );
node.body_start = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn populate_simple_loop_conditions(&mut self, code_text: &str) -> Result<(), ASTError> {
let exit_pattern = &RE_EXIT_WHEN;
let mut updates: Vec<(NodeId, ConditionExpr)> = Vec::new();
for node_id in self.root_id.descendants(&self.arena) {
let node = self.arena.get(node_id)
.ok_or_else(|| ASTError::NodeNotInArena(format!("NodeId {:?} not found", node_id)))?
.get();
if node.node_type == "SimpleLoop" {
if let (Some(body_start), Some(end_index)) = (node.body_start, node.end_index) {
if body_start >= end_index {
continue;
}
let body_text = &code_text[body_start..end_index];
if let Some(cond_match) = exit_pattern.captures_iter(body_text).last() {
let cond_string = cond_match.name("exitcond1")
.map_or("", |m| m.as_str())
.trim()
.to_string();
if !cond_string.is_empty() {
let conditions = AST::parse_condition_expression(&cond_string);
updates.push((node_id, conditions));
}
}
}
}
}
for (node_id, conditions) in updates {
if let Some(node) = self.arena.get_mut(node_id) {
node.get_mut().conditions = Some(conditions);
}
}
Ok(())
}
pub fn extract_while_loops(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let whileloops_pattern = &RE_WHILE_LOOP;
let mut nodes = Vec::new();
for mat in whileloops_pattern.captures_iter(code_text) {
let full_match = mat.get(0).ok_or(ASTError::InvalidCapture)?;
let while_keyword = mat.name("Capturewhile").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..while_keyword.start()].matches('\n').count() + 1;
let start_index = while_keyword.start();
let condition_str = mat.name("exitcond2").unwrap().as_str().to_string();
let conditions = AST::parse_condition_expression(&condition_str);
let mut node = NodeData::new(
"WhileLoop".to_string(),
"WhileLoop".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.conditions = Some(conditions);
node.body_start = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_for_loops(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let forloops_pattern = &RE_FOR_LOOP;
let mut nodes = Vec::new();
for cap_res in forloops_pattern.captures_iter(code_text) {
let mat = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = mat.get(0).ok_or(ASTError::MatchItemMissing)?;
let for_keyword = mat.name("Capturefor").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..for_keyword.start()].matches('\n').count() + 1;
let start_index = for_keyword.start();
let iterator = mat.name("index").unwrap().as_str().to_string();
let direction = mat.name("loop_direction").map_or("to".to_string(), |m| m.as_str().to_string());
let frst = mat.name("frst");
let scnd = mat.name("scnd");
let range_end_group = mat.name("range_end");
let primavar = mat.name("primavar");
let range_start = if range_end_group.is_some() && frst.is_none() {
primavar.map(|m| m.as_str().trim().to_string())
} else if frst.is_some() { frst.map(|m| m.as_str().trim().to_string())
} else {
None
};
let range_end = if range_end_group.is_some() && scnd.is_none() {
range_end_group.map(|m| m.as_str().trim().to_string())
} else if scnd.is_some() { scnd.map(|m| m.as_str().trim().to_string())
} else {
None
};
let range_var = if range_end_group.is_none() && frst.is_none() {
primavar.map(|m| m.as_str().trim().to_string())
} else {
None
};
let iterator_type = if frst.is_some() {
primavar.map(|m| m.as_str().trim().to_string())
} else {
None
};
let mut node = NodeData::new(
"ForLoop".to_string(),
"ForLoop".to_string(),
Some(start_line),
Some(start_index),
false
);
node.iterator = Some(iterator);
node.range_start = range_start;
node.range_end = range_end;
node.direction = Some(direction);
node.iterator_type = iterator_type;
node.range_var = range_var;
node.body_start = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_statement_nodes(code_text: &str) -> Result<Vec<NodeData>,ASTError> {
let mut new_nodes_data: Vec<NodeData> = Vec::new();
new_nodes_data.extend(AST::extract_if_statements(code_text)?);
new_nodes_data.extend(AST::extract_case_statements(code_text)?);
new_nodes_data.extend(AST::extract_elsif_statements(code_text)?);
new_nodes_data.extend(AST::extract_else_statements(code_text)?);
Ok(new_nodes_data) }
pub fn extract_if_statements(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let if_pattern = &RE_IF;
let mut nodes = Vec::new();
for mat in if_pattern.captures_iter(code_text) {
let captures = mat.map_err(|_| ASTError::RegexError)?;
let full_match = captures.get(0).ok_or(ASTError::MatchItemMissing)?;
let if_keyword = captures.name("ifstat").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..if_keyword.start()].matches('\n').count() + 1;
let start_index = if_keyword.start();
let condition_str = captures.name("Condition").unwrap().as_str().to_string();
let conditions = AST::parse_condition_expression(&condition_str);
let mut node = NodeData::new(
"IfStatement".to_string(),
"IfStatement".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.conditions = Some(conditions);
node.body_start = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_case_statements(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let case_pattern = &RE_CASE;
let mut nodes = Vec::new();
for mat in case_pattern.captures_iter(code_text) {
let captures = mat.map_err(|_| ASTError::RegexError)?;
let case_keyword = captures.name("Casestmnt").ok_or(ASTError::InvalidCapture)?;
let full_match = captures.get(0).ok_or(ASTError::MatchItemMissing)?;
let start_line = code_text[..case_keyword.start()].matches('\n').count() + 1;
let start_index = case_keyword.start();
let body_start = full_match.end();
let switch_expression = captures.name("var").unwrap().as_str().trim().to_string();
let mut node = NodeData::new(
"CaseStatement".to_string(),
"CaseStatement".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.switch_expression = Some(switch_expression);
node.body_start = Some(body_start);
nodes.push(node);
}
Ok(nodes)
}
pub fn populate_cases(&mut self, code_text: &str) -> Result<(), ASTError> {
let re_when = &RE_WHEN;
let updates: Vec<_> = self.arena.iter()
.filter_map(|nodo| {
let node_id = self.arena.get_node_id(&nodo)?;
let node = self.arena.get(node_id)?;
if node.get().node_type == "CaseStatement" {
if let (Some(body_start), Some(end_index)) = (node.get().body_start, node.get().end_index) {
if body_start >= end_index { return None; }
let body_text = &code_text[body_start..end_index];
let cases = AST::extract_cases_from_body(body_text, &re_when);
Some((node_id, cases))
} else {
None
}
} else {
None
}
})
.collect();
for (node_id, cases) in updates {
let mut node_data = self.arena[node_id].get().clone();
node_data.cases = Some(cases);
if let Some(node) = self.arena.get_mut(node_id) {
*node.get_mut() = node_data;
} else {
return Err(ASTError::NodeNotInArena(format!("NodeId {:?} not found during case update", node_id)));
}
}
Ok(())
}
pub fn extract_cases_from_body(body_text: &str, re_when: &Reg) -> Vec<String> {
let mut cases = Vec::new();
let mut nm_spaces = 0;
for cap_res in re_when.captures_iter(body_text) {
if let Ok(caso) = cap_res {
let spaces = caso.name("spaces").unwrap().as_str();
if caso.name("doublecase").is_some() {
nm_spaces = spaces.len();
}
if nm_spaces > 0 {
if nm_spaces == spaces.len() {
let choice = caso.name("caso").unwrap().as_str().trim().to_string();
cases.push(choice);
}
} else {
let choice = caso.name("caso").unwrap().as_str().trim().to_string();
cases.push(choice);
}
}
}
cases
}
pub fn parse_parameters(params_opt: Option<&str>) -> Vec<ArgumentData> {
let params_str = match params_opt {
Some(s) => s.trim(),
None => return Vec::new(),
};
let mut all_args = Vec::new();
if params_str.is_empty() {
return all_args;
}
for group in params_str.split(';') {
if group.trim().is_empty() {
continue;
}
let parts: Vec<&str> = group.splitn(2, ':').map(str::trim).collect();
if parts.len() != 2 {
continue;
}
let names_part = parts[0];
let type_part = parts[1];
let (spec_part, default_value) = if let Some((spec, default)) = type_part.split_once(" := ") {
(spec.trim(), Some(default.trim().to_string()))
} else {
(type_part.trim(), None)
};
let (mode, data_type) = if let Some(stripped) = spec_part.strip_prefix("in out") {
("in out", stripped.trim())
} else if let Some(stripped) = spec_part.strip_prefix("out") {
("out", stripped.trim())
} else if let Some(stripped) = spec_part.strip_prefix("in") {
("in", stripped.trim())
} else {
("in", spec_part) };
let data_type_str = data_type.to_string();
for name in names_part.split(',') {
if name.trim().is_empty() {
continue;
}
all_args.push(ArgumentData {
name: name.trim().to_string(),
mode: mode.to_string(),
data_type: data_type_str.clone(),
default_value: default_value.clone(),
});
}
}
all_args
}
pub fn extract_tasks(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let task_pattern = &RE_TASK;
let mut nodes: Vec<NodeData> = Vec::new();
for cap_res in task_pattern.captures_iter(code_text) {
let mat = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = mat.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_keyword = mat.name("category").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..category_keyword.start()].matches('\n').count() + 1;
let start_index = category_keyword.start();
let name = mat.name("name").unwrap().as_str().to_string();
let is_body = mat.name("body").is_some();
let search_text = &code_text[full_match.end()..];
let end_search = Regex::new(r"^\s*;").unwrap().find(search_text);
let mut node = NodeData::new(
name,
"TaskNode".to_string(),
Some(start_line),
Some(start_index),
is_body
);
if let Some(end_match) = end_search {
node.end_line = Some(start_line);
node.end_index = Some(full_match.end() + end_match.end());
}
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_entries(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let entry_pattern = &RE_ENTRY;
let end_pattern = &RE_END_IS_OR_SEMI;
let mut nodes: Vec<NodeData> = Vec::new();
for cap_res in entry_pattern.captures_iter(code_text) {
let mat = cap_res.map_err(|_| ASTError::RegexError)?;
let full_match = mat.get(0).ok_or(ASTError::MatchItemMissing)?;
let category_keyword = mat.name("category").ok_or(ASTError::InvalidCapture)?;
let start_line = code_text[..category_keyword.start()].matches('\n').count() + 1;
let start_index = category_keyword.start();
let name = mat.name("name").unwrap().as_str().to_string();
let is_body = {
let search_text = &code_text[full_match.end()..];
if let Some(end_match) = end_pattern.find(search_text) {
end_match.as_str().trim() == "is"
} else {
false }
};
let mut node = NodeData::new(
name,
"EntryNode".to_string(),
Some(start_line),
Some(start_index),
is_body,
);
node.arguments = Some(AST::parse_parameters(mat.name("params").map(|m| m.as_str())));
if let Some(condition_match) = mat.name("condition") {
let condition_str = condition_match.as_str().to_string();
if !condition_str.is_empty() {
node.conditions = Some(AST::parse_condition_expression(&condition_str));
}
}
if !is_body {
let search_text = &code_text[full_match.end()..];
if let Some(end_match) = end_pattern.find(search_text) {
if end_match.as_str().trim() == ";" {
node.end_line = Some(start_line); node.end_index = Some(full_match.end() + end_match.end());
}
}
}
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_variable_declarations(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let var_pattern = &RE_VAR_DECL;
let mut nodes = Vec::new();
for cap_res in var_pattern.captures_iter(code_text) {
let captures = cap_res.map_err(|_| ASTError::RegexError)?;
if let Some(names_list_match) = captures.name("names_list") {
let _full_match = captures.get(0).ok_or(ASTError::MatchItemMissing)?;
let data_type_str = captures.name("data_type")
.map_or("", |m| m.as_str())
.trim()
.to_string();
let conditions = captures.name("default_value")
.map(|m| AST::parse_condition_expression(m.as_str()));
let start_line = code_text[..names_list_match.start()].matches('\n').count() + 1;
let end_line = code_text[.._full_match.end()].matches('\n').count() + 1;
let end_index = _full_match.end();
for name_str in names_list_match.as_str().split(',') {
let name = name_str.trim().to_string();
if name.is_empty() { continue; }
let start_index = names_list_match.start();
let mut node = NodeData::new(
name,
"VariableDeclaration".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.end_line = Some(end_line);
node.end_index = Some(end_index);
node.base_type = Some(data_type_str.clone());
node.conditions = conditions.clone();
nodes.push(node);
}
}
}
Ok(nodes)
}
pub fn parse_condition_expression(condition_str: &str) -> ConditionExpr {
let mut list = Vec::new();
let root_expression = AST::supersplitter(condition_str.to_string(), &mut list);
ConditionExpr { list,albero: Some(Box::new(root_expression)), }
}
fn size_checker(keyword: &str, condstring: &str, index: usize) -> bool {
keyword.len() + index < condstring.len()
}
fn keyword_checker(keyword: &str, condstring: &str, index: usize) -> bool {
if condstring[index..].starts_with(keyword) {
if index > 0 {
if !(condstring.as_bytes()[index + keyword.len()] == b' ' || condstring.as_bytes()[index + keyword.len()] == b'\n') || !(condstring.as_bytes()[index - 1] == b' ' || condstring.as_bytes()[index - 1] == b'\n' || index == 0) {
return false;
}
} else {
if !(condstring.as_bytes()[index + keyword.len()] == b' ' || condstring.as_bytes()[index + keyword.len()] == b'\n') {
return false;
}
}
return true;
}
false
}
fn keyword_identifier(condstring: &str, index: usize) -> (&'static str, i32) {
if AST::size_checker("<", condstring, index) {
if condstring.as_bytes()[index] == b'<' && condstring.as_bytes()[index + 1] != b'=' {
return ("<", 0);
}
}
if AST::size_checker(">", condstring, index) {
if condstring.as_bytes()[index] == b'>' && condstring.as_bytes()[index + 1] != b'=' {
return (">", 0);
}
}
if AST::size_checker("<=", condstring, index) {
if condstring.as_bytes()[index] == b'<' && condstring.as_bytes()[index + 1] == b'=' {
return ("<=", 0);
}
}
if AST::size_checker(">=", condstring, index) {
if condstring.as_bytes()[index] == b'>' && condstring.as_bytes()[index + 1] == b'=' {
return (">=", 0);
}
}
if AST::size_checker("/=", condstring, index) {
if condstring.as_bytes()[index] == b'/' && condstring.as_bytes()[index + 1] == b'=' {
return ("/=", 0);
}
}
if condstring.as_bytes()[index] == b'=' && (index == 0 || (condstring.as_bytes()[index - 1] != b'/' && condstring.as_bytes()[index - 1] != b'<' && condstring.as_bytes()[index - 1] != b'>')) {
return ("=", 0);
}
if AST::size_checker("in", condstring, index) {
if AST::keyword_checker("in", condstring, index) {
return ("in", 0);
}
}
if AST::size_checker("not in", condstring, index) {
if AST::keyword_checker("not in", condstring, index) {
return ("not in", 0);
}
}
if AST::size_checker("and then", condstring, index) {
if AST::keyword_checker("and then", condstring, index) {
return ("and then", 1);
}
}
if AST::size_checker("and", condstring, index) {
if AST::keyword_checker("and", condstring, index) {
return ("and", 1);
}
}
if AST::size_checker("or else", condstring, index) {
if AST::keyword_checker("or else", condstring, index) {
return ("or else", 1);
}
}
if AST::size_checker("or", condstring, index) {
if AST::keyword_checker("or", condstring, index) {
return ("or", 1);
}
}
if AST::size_checker("xor", condstring, index) {
if AST::keyword_checker("xor", condstring, index) {
return ("xor", 1);
}
}
if AST::size_checker("not", condstring, index) {
if AST::keyword_checker("not", condstring, index) {
return ("not", 1);
}
}
("No_keyword", 2)
}
pub fn recursive_function(keyword_str: &str, condstring: String, index: usize, lst: &mut Vec<Expression>) -> Expression {
let keyword = keyword_str.to_string(); if let Some(un_keyword) = match keyword.as_str() {
"not" => Some(Unaries::NOT),
_ => None,
} {
let split2 = condstring[index + keyword.len()..].trim().to_string();
let operand_expr = AST::supersplitter(split2.clone(), lst); let unary_expr = UnaryExpression {
op: un_keyword,
operand: Box::new(operand_expr.clone()),
condstring: condstring.clone(),
};
lst.push(Expression::Unary(unary_expr.clone())); return Expression::Unary(unary_expr);
} else if let Some(bin_keyword) = match keyword.as_str() {
"and" => Some(Binaries::AND),
"or" => Some(Binaries::OR),
"and then" => Some(Binaries::AND_THEN),
"or else" => Some(Binaries::OR_ELSE),
"xor" => Some(Binaries::XOR),
"<" => Some(Binaries::INFERIOR),
">" => Some(Binaries::SUPERIOR),
"<=" => Some(Binaries::INFERIOR_OR_EQUAL),
">=" => Some(Binaries::SUPERIOR_OR_EQUAL),
"=" => Some(Binaries::EQUAL),
"/=" => Some(Binaries::UNEQUAL),
_ => None,
} {
let split1 = condstring[..index].trim().to_string();
let split2 = condstring[index + keyword.len()..].trim().to_string();
let left_expr = AST::supersplitter(split1.clone(), lst); let right_expr = AST::supersplitter(split2.clone(), lst); let binary_expr = BinaryExpression {
op: bin_keyword,
left: Box::new(left_expr.clone()),
right: Box::new(right_expr.clone()),
condstring: condstring.clone(),
};
lst.push(Expression::Binary(binary_expr.clone())); return Expression::Binary(binary_expr);
} else if let Some(mem_keyword) = match keyword.as_str() {
"in" => Some(Memberships::IN),
"not in" => Some(Memberships::NOT_IN),
_ => None,
} {
let split1 = condstring[..index].trim().to_string();
let split2 = condstring[index + keyword.len()..].trim().to_string();
let left_expr = AST::supersplitter(split1.clone(), lst);
let right_expr = AST::supersplitter(split2.clone(), lst);
let membership_expr = MembershipExpression {
op: mem_keyword,
left: Box::new(left_expr.clone()),
right: Box::new(right_expr.clone()),
condstring: condstring.clone(),
};
lst.push(Expression::Membership(membership_expr.clone())); return Expression::Membership(membership_expr);
}
Expression::Literal(condstring) }
pub fn supersplitter(condstring_in: String, lst: &mut Vec<Expression>) -> Expression {
let mut number_of_open_parenthesis = 0;
let mut number_of_closed_parenthesis = 0;
let mut string_flag = 0;
let mut saved_index = 0;
let mut priority_flag = 0;
let mut first_occurence = 0;
let mut temp_keyword: &'static str = "No_keyword";
let mut condstring = condstring_in.trim().to_string();
if AST::is_expression_a_parenthesis(&condstring) {
condstring = condstring[1..condstring.len() - 1].to_string();
}
for (index, char) in condstring.chars().enumerate() {
let (updated_string_flag, updated_open_parenthesis, updated_closed_parenthesis) =
AST::flag_setter(char, string_flag, number_of_open_parenthesis, number_of_closed_parenthesis);
string_flag = updated_string_flag;
number_of_open_parenthesis = updated_open_parenthesis;
number_of_closed_parenthesis = updated_closed_parenthesis;
if AST::flags_check(number_of_open_parenthesis, number_of_closed_parenthesis, string_flag) {
let (keyword, prio_flag) = AST::keyword_identifier(&condstring, index);
priority_flag = prio_flag;
if priority_flag == 1 {
return AST::recursive_function(keyword, condstring, index, lst);
} else if priority_flag == 0 && first_occurence == 0 {
temp_keyword = keyword;
first_occurence = 1;
saved_index = index;
}
}
}
if priority_flag == 0 || priority_flag == 2 {
return AST::recursive_function(temp_keyword, condstring, saved_index, lst);
}
Expression::Literal(condstring) }
pub fn flag_setter( char: char, string_flag: i32, number_of_open_parenthesis: i32, number_of_closed_parenthesis: i32) -> (i32, i32, i32) {
let mut mut_string_flag = string_flag;
let mut mut_number_of_open_parenthesis = number_of_open_parenthesis;
let mut mut_number_of_closed_parenthesis = number_of_closed_parenthesis;
if char == '"' && string_flag == 0 {
mut_string_flag = 1;
} else if char == '"' && string_flag == 1 {
mut_string_flag = 0;
}
if char == '(' && string_flag == 0 {
mut_number_of_open_parenthesis += 1;
}
if char == ')' && string_flag == 0 {
mut_number_of_closed_parenthesis += 1;
}
(mut_string_flag, mut_number_of_open_parenthesis, mut_number_of_closed_parenthesis)
}
pub fn flags_check(number_of_open_parenthesis: i32, number_of_closed_parenthesis: i32, string_flag: i32) -> bool {
(number_of_open_parenthesis - number_of_closed_parenthesis == 0) && string_flag == 0
}
pub fn is_parenthesis_exterior(expression: &str) -> bool {
let mut cnt_paren = 0;
let mut cnt_prn = 0;
for (idx, char) in expression.chars().enumerate() {
if char == '(' && idx != 0 {
cnt_paren += 1;
}
if char == ')' && idx != expression.len() - 1 {
cnt_prn += 1;
if cnt_prn > cnt_paren {
return false;
}
}
}
true
}
pub fn is_expression_a_parenthesis(expression: &str) -> bool {
if expression.len() < 2 {
return false;
}
if expression.starts_with('(') && expression.ends_with(')') {
if AST::is_parenthesis_exterior(expression) {
return true;
} else {
return false;
}
}
false
}
pub fn clean_code(raw_code: &str) -> String {
let space_to_tab_ratio = 4;
let ada_code_content = raw_code.replace("\t", &" ".repeat(space_to_tab_ratio));
let cleaned_code = RE_CLEAN_CODE.replace_all(&ada_code_content, |caps: ®ex::Captures| {
if let Some(string_match) = caps.name("string") {
let full_str = string_match.as_str();
if full_str.len() >= 2 {
format!("\"{}\"", " ".repeat(full_str.len() - 2))
} else {
" ".repeat(full_str.len())
}
} else if let Some(comment) = caps.name("comment") {
" ".repeat(comment.as_str().len())
}
else {
caps.get(0).unwrap().as_str().to_string()
}
});
cleaned_code.into_owned()
}
pub fn extract_all_nodes(code_text: &str) -> Result<Vec<NodeData>,ASTError> {
let mut nodes: Vec<NodeData> = Vec::new();
nodes.extend(AST::extract_packages(code_text)?);
nodes.extend(AST::extract_procedures_functions(code_text)?);
nodes.extend(AST::extract_type_declarations(code_text)?);
nodes.extend(AST::extract_declare_blocks(code_text)?);
nodes.extend(AST::extract_control_flow_nodes(code_text)?);
nodes.extend(AST::extract_statement_nodes(code_text)?);
nodes.extend(AST::extract_variable_declarations(code_text)?);
nodes.extend(AST::extract_tasks(code_text)?);
nodes.extend(AST::extract_entries(code_text)?);
Ok(nodes)
}
pub fn leggitree(nodo: &Expression, level: u32, prefix: &str) {
match nodo {
Expression::Binary(binary_expr) => {
println!("{} {} {}", " ".repeat((level * 4) as usize), prefix, binary_expr.condstring);
AST::leggitree(&*binary_expr.left, level + 1, "L--- ");
AST::leggitree(&*binary_expr.right, level + 1, "R--- ");
}
Expression::Membership(membership_expr) => {
println!("{} {} {}", " ".repeat((level * 4) as usize), prefix, membership_expr.condstring);
AST::leggitree(&*membership_expr.left, level + 1, "L--- ");
AST::leggitree(&*membership_expr.right, level + 1, "R--- ");
}
Expression::Unary(unary_expr) => {
println!("{} {} {}", " ".repeat((level * 4) as usize), prefix, unary_expr.condstring);
AST::leggitree(&*unary_expr.operand, level + 1, "U--- ");
}
Expression::Literal(literal_expr) => {
println!("{} {} Literal: {}", " ".repeat((level * 4) as usize), prefix, literal_expr);
}
Expression::Condition(condition_expr) => {
println!("{} {} Condition Expression (albero):", " ".repeat((level * 4) as usize), prefix);
if let Some(albero) = &condition_expr.albero {
AST::leggitree(&*albero, level + 1, "A--- "); } else {
println!("{} No albero in ConditionExpr", " ".repeat(((level + 1) * 4) as usize));
}
}
}
}
pub fn find_node_by_name_and_type(&self, name: &str, node_type: &str) -> Option<NodeId> {
self.root_id.descendants(&self.arena).find(|&node_id| {
if let Some(node) = self.arena.get(node_id) {
let node_data = node.get();
node_data.name == name && node_data.node_type == node_type
} else {
false
}
})
}
pub fn extract_elsif_statements(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let elsif_pattern = &RE_ELSIF;
let mut nodes = Vec::new();
for mat in elsif_pattern.captures_iter(code_text) {
let captures = mat.map_err(|_| ASTError::RegexError)?;
let elsif_keyword = captures.name("elsifstat").ok_or(ASTError::InvalidCapture)?;
let full_match = captures.get(0).ok_or(ASTError::MatchItemMissing)?;
let start_line = code_text[..elsif_keyword.start()].matches('\n').count() + 1;
let start_index = elsif_keyword.start();
let condition_str = captures.name("Condition").unwrap().as_str().to_string();
let conditions = AST::parse_condition_expression(&condition_str);
let mut node = NodeData::new(
"ElsifStatement".to_string(),
"ElsifStatement".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.conditions = Some(conditions);
node.body_start = Some(full_match.end());
node.end_line = Some(code_text[..full_match.end()].matches('\n').count() + 1);
node.end_index = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
pub fn extract_else_statements(code_text: &str) -> Result<Vec<NodeData>, ASTError> {
let else_pattern = &RE_ELSE;
let mut nodes = Vec::new();
for mat in else_pattern.captures_iter(code_text) {
let captures = mat.map_err(|_| ASTError::RegexError)?;
let else_keyword = captures.name("elsestat").ok_or(ASTError::InvalidCapture)?;
let full_match = captures.get(0).ok_or(ASTError::MatchItemMissing)?;
let start_line = code_text[..else_keyword.start()].matches('\n').count() + 1;
let start_index = else_keyword.start();
let mut node = NodeData::new(
"ElseStatement".to_string(),
"ElseStatement".to_string(),
Some(start_line),
Some(start_index),
false,
);
node.body_start = Some(full_match.end());
node.end_line = Some(start_line);
node.end_index = Some(full_match.end());
nodes.push(node);
}
Ok(nodes)
}
}