#![cfg_attr(coverage_nightly, coverage(off))]
use crate::models::unified_ast::{
AstDag, AstKind, FunctionKind, Language, TypeKind, UnifiedAstNode,
};
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::path::Path;
use std::time::{Duration, Instant};
use tree_sitter::{Node, Parser};
const MAX_RECURSION_DEPTH: usize = 1000;
const MAX_PARSING_TIME: Duration = Duration::from_secs(30);
const MAX_STRING_LENGTH: usize = 1024 * 1024; const MAX_NODES: usize = 100_000;
pub struct KotlinAstParser {
parser: Parser,
max_depth: usize,
timeout: Duration,
}
impl Default for KotlinAstParser {
fn default() -> Self {
Self::new()
}
}
impl KotlinAstParser {
#[must_use]
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn new() -> Self {
Self::with_limits(MAX_RECURSION_DEPTH, MAX_PARSING_TIME)
}
#[must_use]
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn with_limits(max_depth: usize, timeout: Duration) -> Self {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_kotlin_ng::LANGUAGE.into())
.expect("Failed to set Kotlin language");
Self {
parser,
max_depth,
timeout,
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn parse_file(&mut self, path: &Path, content: &str) -> Result<AstDag> {
if content.len() > MAX_STRING_LENGTH {
return Err(anyhow::anyhow!(
"File too large: {} bytes exceeds limit of {} bytes",
content.len(),
MAX_STRING_LENGTH
));
}
let mut dag = AstDag::new();
let start_time = Instant::now();
let tree = self
.parser
.parse(content, None)
.context("Failed to parse Kotlin file")?;
let root = tree.root_node();
let mut ctx = ParseContext {
content,
dag: &mut dag,
path: path.to_string_lossy().into_owned(),
stack: Vec::with_capacity(self.max_depth),
node_map: HashMap::with_capacity(1000),
start_time,
max_depth: self.max_depth,
timeout: self.timeout,
current_depth: 0,
nodes_created: 0,
};
if let Err(e) = self.visit_node(&mut ctx, root) {
return Err(anyhow::anyhow!("Error during AST traversal: {e}"));
}
Ok(dag)
}
fn visit_node(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
if ctx.current_depth >= ctx.max_depth {
return Err(anyhow::anyhow!(
"Maximum recursion depth exceeded: {} at depth {}",
ctx.max_depth,
ctx.current_depth
));
}
if ctx.start_time.elapsed() > ctx.timeout {
return Err(anyhow::anyhow!(
"Parsing timeout exceeded: {:?} at depth {}",
ctx.timeout,
ctx.current_depth
));
}
if ctx.nodes_created >= MAX_NODES {
return Err(anyhow::anyhow!(
"Maximum nodes limit exceeded: {} at depth {}",
MAX_NODES,
ctx.current_depth
));
}
ctx.current_depth += 1;
let node_id = match node.kind() {
"class_declaration" => self.process_class(ctx, node)?,
"object_declaration" => self.process_object(ctx, node)?,
"function_declaration" => self.process_function(ctx, node)?,
"enum_class_declaration" => self.process_enum(ctx, node)?,
_ => None,
};
if node.kind() == "class_body" {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "function_declaration" {
self.process_function(ctx, child)?;
}
}
}
let child_result: Result<()> = {
let mut work_stack = Vec::new();
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
work_stack.push((child, ctx.current_depth + 1));
}
while let Some((work_node, depth)) = work_stack.pop() {
if ctx.nodes_created >= MAX_NODES {
break;
}
if depth >= ctx.max_depth {
continue;
}
if ctx.start_time.elapsed() > ctx.timeout {
break;
}
let kind = work_node.kind();
if kind == "class_declaration"
|| kind == "object_declaration"
|| kind == "function_declaration"
|| kind == "enum_class_declaration"
{
let old_depth = ctx.current_depth;
ctx.current_depth = depth;
let _ = self.process_node_simple(ctx, work_node);
ctx.current_depth = old_depth;
} else if kind == "class_body" {
let mut body_cursor = work_node.walk();
for body_child in work_node.children(&mut body_cursor) {
if body_child.kind() == "function_declaration" {
work_stack.push((body_child, depth + 1));
}
}
} else {
let mut child_cursor = work_node.walk();
for child in work_node.children(&mut child_cursor) {
work_stack.push((child, depth + 1));
}
}
}
Ok(())
};
ctx.current_depth -= 1;
child_result?;
Ok(node_id)
}
fn process_class(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
let source_start = node.start_byte();
let source_end = (source_start + 20)
.min(node.end_byte())
.min(ctx.content.len());
let source_prefix = ctx
.content
.get(source_start..source_end)
.unwrap_or_default();
let is_enum = source_prefix.starts_with("enum ");
let name = if is_enum {
self.extract_enum_name(ctx, node)
.unwrap_or_else(|| String::from("AnonymousEnum"))
} else {
self.extract_identifier(ctx, node, "simple_identifier")
.unwrap_or_else(|| String::from("AnonymousClass"))
};
let kind = if is_enum {
AstKind::Type(TypeKind::Enum)
} else {
AstKind::Type(TypeKind::Class)
};
let mut ast_node = UnifiedAstNode::new(kind, Language::Kotlin);
ast_node.source_range = node.start_byte() as u32..node.end_byte() as u32;
self.set_name_vector(&mut ast_node, &name);
let node_id = ctx.dag.add_node(ast_node);
ctx.nodes_created += 1;
if !is_enum {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "class_body" {
let mut body_cursor = child.walk();
for body_child in child.children(&mut body_cursor) {
if body_child.kind() == "function_declaration" {
self.process_function(ctx, body_child)?;
}
}
}
}
}
Ok(Some(node_id as usize))
}
fn process_object(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
let name = self
.extract_identifier(ctx, node, "simple_identifier")
.unwrap_or_else(|| String::from("AnonymousObject"));
let mut ast_node = UnifiedAstNode::new(AstKind::Type(TypeKind::Class), Language::Kotlin);
ast_node.source_range = node.start_byte() as u32..node.end_byte() as u32;
self.set_name_vector(&mut ast_node, &name);
let node_id = ctx.dag.add_node(ast_node);
ctx.nodes_created += 1;
Ok(Some(node_id as usize))
}
fn process_enum(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
let name = self
.extract_identifier(ctx, node, "simple_identifier")
.unwrap_or_else(|| String::from("AnonymousEnum"));
let mut ast_node = UnifiedAstNode::new(AstKind::Type(TypeKind::Enum), Language::Kotlin);
ast_node.source_range = node.start_byte() as u32..node.end_byte() as u32;
self.set_name_vector(&mut ast_node, &name);
let node_id = ctx.dag.add_node(ast_node);
ctx.nodes_created += 1;
Ok(Some(node_id as usize))
}
fn process_function(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
let name = self
.extract_identifier(ctx, node, "simple_identifier")
.unwrap_or_else(|| String::from("anonymousFunction"));
let mut ast_node =
UnifiedAstNode::new(AstKind::Function(FunctionKind::Regular), Language::Kotlin);
ast_node.source_range = node.start_byte() as u32..node.end_byte() as u32;
self.set_name_vector(&mut ast_node, &name);
let node_id = ctx.dag.add_node(ast_node);
ctx.nodes_created += 1;
Ok(Some(node_id as usize))
}
fn process_node_simple(&self, ctx: &mut ParseContext, node: Node) -> Result<Option<usize>> {
let kind = node.kind();
if kind == "class_declaration" {
self.process_class(ctx, node)
} else if kind == "object_declaration" {
self.process_object(ctx, node)
} else if kind == "function_declaration" {
self.process_function(ctx, node)
} else if kind == "enum_class_declaration" {
self.process_enum(ctx, node)
} else {
Ok(None)
}
}
fn set_name_vector(&self, _node: &mut UnifiedAstNode, _name: &str) {
}
fn extract_enum_name(&self, ctx: &mut ParseContext, node: Node) -> Option<String> {
let source_text = ctx
.content
.get(node.start_byte()..node.end_byte())
.unwrap_or_default();
if let Some(first_line) = source_text.lines().next() {
let words: Vec<&str> = first_line.split_whitespace().collect();
if words.len() >= 3 && words[0] == "enum" && words[1] == "class" {
let name = words[2].trim_end_matches('{').trim();
return Some(name.to_string());
}
}
None
}
fn extract_identifier(
&self,
ctx: &mut ParseContext,
node: Node,
identifier_kind: &str,
) -> Option<String> {
debug_assert!(
!identifier_kind.is_empty(),
"identifier_kind must not be empty"
);
let source_start = node.start_byte();
let source_end = (source_start + 20)
.min(node.end_byte())
.min(ctx.content.len());
let source_prefix = ctx
.content
.get(source_start..source_end)
.unwrap_or_default();
let is_enum = source_prefix.starts_with("enum ");
let mut found_identifiers = Vec::new();
if is_enum {
return None;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == identifier_kind {
let start = child.start_byte();
let end = child.end_byte();
if start < ctx.content.len() && end <= ctx.content.len() && start < end {
let text = ctx.content.get(start..end).unwrap_or_default().to_string();
found_identifiers.push(text);
}
}
}
if is_enum && found_identifiers.len() > 1 {
found_identifiers
.into_iter()
.rev()
.find(|s| s != "class" && s != "enum")
} else {
found_identifiers.into_iter().next()
}
}
}
struct ParseContext<'a> {
content: &'a str,
dag: &'a mut AstDag,
path: String,
stack: Vec<usize>,
node_map: HashMap<usize, usize>,
start_time: Instant,
max_depth: usize,
timeout: Duration,
current_depth: usize,
nodes_created: usize,
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}