use crate::parsing::method_call::MethodCall;
use crate::types::SymbolCounter;
use crate::{FileId, Range, Symbol};
use std::any::Any;
use std::collections::HashSet;
use tree_sitter::Node;
pub trait LanguageParser: Send + Sync {
fn parse(
&mut self,
code: &str,
file_id: FileId,
symbol_counter: &mut SymbolCounter,
) -> Vec<Symbol>;
fn as_any(&self) -> &dyn Any;
fn extract_doc_comment(&self, node: &Node, code: &str) -> Option<String>;
fn find_calls<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)>;
fn find_method_calls(&mut self, code: &str) -> Vec<MethodCall> {
self.find_calls(code)
.into_iter()
.map(|(caller, target, range)| MethodCall::new(caller, target, range))
.collect()
}
fn find_implementations<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)>;
fn find_extends<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> {
Vec::new()
}
fn find_uses<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)>;
fn find_defines<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)>;
fn find_imports(&mut self, code: &str, file_id: FileId) -> Vec<crate::parsing::Import>;
fn language(&self) -> crate::parsing::Language;
fn find_variable_types<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> {
Vec::new()
}
fn find_variable_types_with_substitution(
&mut self,
_code: &str,
) -> Option<Vec<(String, String, Range)>> {
None
}
fn find_inherent_methods(&mut self, _code: &str) -> Vec<(String, String, Range)> {
Vec::new()
}
}
pub trait ParserFactory: Send + Sync {
fn create(&self) -> Result<Box<dyn LanguageParser>, String>;
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct HandledNode {
pub name: String,
pub id: u16,
}
pub trait NodeTracker {
fn get_handled_nodes(&self) -> &HashSet<HandledNode>;
fn register_handled_node(&mut self, node_kind: &str, node_id: u16);
}
#[derive(Debug, Default)]
pub struct NodeTrackingState {
handled_nodes: HashSet<HandledNode>,
}
impl NodeTrackingState {
pub fn new() -> Self {
Self {
handled_nodes: HashSet::new(),
}
}
}
impl NodeTracker for NodeTrackingState {
fn get_handled_nodes(&self) -> &HashSet<HandledNode> {
&self.handled_nodes
}
#[inline]
fn register_handled_node(&mut self, node_kind: &str, node_id: u16) {
let node_info = HandledNode {
name: node_kind.to_string(),
id: node_id,
};
self.handled_nodes.insert(node_info);
}
}
#[inline]
pub fn safe_truncate_str(s: &str, max_bytes: usize) -> &str {
if s.len() <= max_bytes {
return s;
}
let mut boundary = max_bytes;
while boundary > 0 && !s.is_char_boundary(boundary) {
boundary -= 1;
}
&s[..boundary]
}
pub const MAX_AST_DEPTH: usize = 500;
#[inline]
pub fn check_recursion_depth(depth: usize, node: Node) -> bool {
if depth > MAX_AST_DEPTH {
tracing::warn!(
"[parser] maximum AST depth ({MAX_AST_DEPTH}) exceeded at line {}:{}. Skipping subtree to prevent stack overflow.",
node.start_position().row + 1,
node.start_position().column + 1
);
return false;
}
true
}
pub fn safe_substring_window(code: &str, end_byte: usize, window_size: usize) -> &str {
let end = end_byte.min(code.len());
let start_raw = end.saturating_sub(window_size);
let start = if start_raw > 0 && !code.is_char_boundary(start_raw) {
(start_raw..=start_raw.saturating_add(3).min(end))
.find(|&i| code.is_char_boundary(i))
.unwrap_or(end) } else {
start_raw
};
&code[start..end]
}
#[inline]
pub fn truncate_for_display(s: &str, max_bytes: usize) -> String {
let truncated = safe_truncate_str(s, max_bytes);
if truncated.len() < s.len() {
format!("{truncated}...")
} else {
truncated.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_safe_truncate_with_emoji_panic() {
let text = "Status: π Active";
eprintln!("Input text: '{}' (len: {} bytes)", text, text.len());
eprintln!("Attempting to truncate at byte 10...");
let result = safe_truncate_str(text, 10);
eprintln!("Result: '{}' (len: {} bytes)", result, result.len());
assert_eq!(result, "Status: "); assert!(result.len() <= 10);
eprintln!("β
Safe truncation avoided panic at emoji boundary!");
}
#[test]
fn test_safe_truncate_exact_boundary() {
let text = "Hello, World!";
let result = safe_truncate_str(text, 7);
assert_eq!(result, "Hello, ");
}
#[test]
fn test_safe_truncate_multi_byte_chars() {
let text = "CafΓ© is nice";
eprintln!("\n2-byte char test:");
eprintln!(" Input: '{}' (len: {} bytes)", text, text.len());
eprintln!(" 'Γ©' starts at byte 3, is 2 bytes long");
let result = safe_truncate_str(text, 4);
eprintln!(" Truncate at 4: '{}' (len: {})", result, result.len());
assert_eq!(result, "Caf");
let text = "Treeβββbranch";
eprintln!("\n3-byte char test:");
eprintln!(" Input: '{}' (len: {} bytes)", text, text.len());
eprintln!(" 'β' starts at byte 4, is 3 bytes long");
let result = safe_truncate_str(text, 5);
eprintln!(" Truncate at 5: '{}' (len: {})", result, result.len());
assert_eq!(result, "Tree"); eprintln!("β
Multi-byte character boundaries handled correctly!");
}
#[test]
fn test_truncate_for_display() {
let text = "This is a very long string that needs truncation";
let result = truncate_for_display(text, 10);
assert_eq!(result, "This is a ...");
let short_text = "Short";
let result = truncate_for_display(short_text, 10);
assert_eq!(result, "Short");
}
#[test]
fn test_issue_29_exact_case() {
let text = r#"[
f"π System Status: {health.status.title()} {health.status_emoji}",
f"βββ Active Processes: {health.process_count}/{self.config.critical_threshold} ""#;
eprintln!("\nπ Issue #29 - Exact reproduction case:");
eprintln!("Input text length: {} bytes", text.len());
eprintln!("Text contains emojis: π at byte ~15, β at byte ~95");
eprintln!("\nAttempting truncation at byte 100...");
let result = safe_truncate_str(text, 100);
eprintln!("Truncated to {} bytes without panic!", result.len());
eprintln!(
"Result ends with: '{}'",
&result[result.len().saturating_sub(20)..]
);
assert!(result.len() <= 100);
assert!(text.starts_with(result));
let display = truncate_for_display(text, 100);
eprintln!(
"\nDisplay truncation result: {} bytes (includes '...' if truncated)",
display.len()
);
assert!(display.len() <= 103);
eprintln!("β
Issue #29 fixed - no panic on emoji boundaries!");
}
}