use crate::ingest::detect::Language;
use anyhow::Result;
use std::cell::RefCell;
thread_local! {
static RUST_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static PYTHON_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static C_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static CPP_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static JAVA_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static JAVASCRIPT_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
static TYPESCRIPT_PARSER: RefCell<Option<tree_sitter::Parser>> = const { RefCell::new(None) };
}
fn with_rust_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
RUST_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_rust::language())?;
*parser_ref = Some(parser);
} let parser = parser_ref.as_mut().expect(
"Parser invariant violated: Option must be Some() after initialization (lines 49-52)",
);
Ok(f(parser))
})
}
fn with_python_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
PYTHON_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_python::language())?;
*parser_ref = Some(parser);
}
let parser = parser_ref
.as_mut()
.expect("Python parser invariant violated: Option must be Some() after initialization"); Ok(f(parser))
})
}
fn with_c_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
C_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language())?;
*parser_ref = Some(parser);
}
let parser = parser_ref
.as_mut()
.expect("C parser invariant violated: Option must be Some() after initialization"); Ok(f(parser))
})
}
fn with_cpp_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
CPP_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_cpp::language())?;
*parser_ref = Some(parser);
}
let parser = parser_ref
.as_mut()
.expect("C++ parser invariant violated: Option must be Some() after initialization"); Ok(f(parser))
})
}
fn with_java_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
JAVA_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_java::language())?;
*parser_ref = Some(parser);
}
let parser = parser_ref
.as_mut()
.expect("Java parser invariant violated: Option must be Some() after initialization"); Ok(f(parser))
})
}
fn with_javascript_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
JAVASCRIPT_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_javascript::language())?;
*parser_ref = Some(parser);
} let parser = parser_ref.as_mut().expect(
"JavaScript parser invariant violated: Option must be Some() after initialization",
);
Ok(f(parser))
})
}
fn with_typescript_parser<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
TYPESCRIPT_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_typescript::language_typescript())?;
*parser_ref = Some(parser);
} let parser = parser_ref.as_mut().expect(
"TypeScript parser invariant violated: Option must be Some() after initialization",
);
Ok(f(parser))
})
}
fn with_rust_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
RUST_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_rust::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_python_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
PYTHON_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_python::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_c_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
C_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_cpp_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
CPP_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_cpp::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_java_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
JAVA_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_java::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_javascript_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
JAVASCRIPT_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_javascript::language())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
fn with_typescript_parser_opt<F, R>(f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
TYPESCRIPT_PARSER.with(|parser_cell| {
let mut parser_ref = parser_cell.borrow_mut();
if parser_ref.is_none() {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_typescript::language_typescript())?;
*parser_ref = Some(parser);
}
Ok(f(&mut parser_ref))
})
}
pub fn with_parser_opt<F, R>(language: Language, f: F) -> Result<R>
where
F: FnOnce(&mut Option<tree_sitter::Parser>) -> R,
{
match language {
Language::Rust => with_rust_parser_opt(f),
Language::Python => with_python_parser_opt(f),
Language::C => with_c_parser_opt(f),
Language::Cpp => with_cpp_parser_opt(f),
Language::Java => with_java_parser_opt(f),
Language::JavaScript => with_javascript_parser_opt(f),
Language::TypeScript => with_typescript_parser_opt(f),
}
}
pub fn with_parser<F, R>(language: Language, f: F) -> Result<R>
where
F: FnOnce(&mut tree_sitter::Parser) -> R,
{
match language {
Language::Rust => with_rust_parser(f),
Language::Python => with_python_parser(f),
Language::C => with_c_parser(f),
Language::Cpp => with_cpp_parser(f),
Language::Java => with_java_parser(f),
Language::JavaScript => with_javascript_parser(f),
Language::TypeScript => with_typescript_parser(f),
}
}
pub fn warmup_parsers() -> Result<()> {
let test_cases: [(Language, &[u8]); 7] = [
(Language::Rust, b"fn test() {}"),
(Language::Python, b"def test(): pass"),
(Language::C, b"int test() { return 0; }"),
(Language::Cpp, b"void test() {}"),
(Language::Java, b"class Test {}"),
(Language::JavaScript, b"function test() {}"),
(Language::TypeScript, b"function test(): void {}"),
];
for (lang, source) in test_cases {
let _ = with_parser(lang, |parser| {
parser.parse(source, None);
Ok::<(), anyhow::Error>(())
})?;
}
Ok(())
}
pub fn cleanup_parsers() {
RUST_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
PYTHON_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
C_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
CPP_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
JAVA_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
JAVASCRIPT_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
TYPESCRIPT_PARSER.with(|parser_cell| {
parser_cell.borrow_mut().take();
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_reuse() {
let addr1 = with_rust_parser(|p| p as *const _ as usize).unwrap();
let addr2 = with_rust_parser(|p| p as *const _ as usize).unwrap();
assert_eq!(addr1, addr2, "Parser should be reused in same thread");
}
#[test]
fn test_all_languages_have_parsers() {
let languages = [
Language::Rust,
Language::Python,
Language::C,
Language::Cpp,
Language::Java,
Language::JavaScript,
Language::TypeScript,
];
for lang in languages {
let result = with_parser(lang, |parser| {
let tree = parser.parse(b"", None);
tree.is_some()
});
assert!(
result.is_ok(),
"Language {:?} should have a working parser",
lang
);
assert!(
result.unwrap(),
"Language {:?} should parse successfully",
lang
);
}
}
#[test]
fn test_parser_initialization() {
let source = b"fn test() {}";
let result1 = with_parser(Language::Rust, |parser| {
parser.parse(source, None).is_some()
})
.unwrap();
assert!(result1, "First parse should succeed");
let result2 = with_parser(Language::Rust, |parser| {
parser.parse(source, None).is_some()
})
.unwrap();
assert!(result2, "Second parse should succeed with reused parser");
}
#[test]
fn test_concurrent_access() {
use std::sync::{Arc, Barrier};
use std::thread;
let source = b"fn test() {}";
let barrier = Arc::new(Barrier::new(2));
let barrier_clone = barrier.clone();
let handle = thread::spawn(move || {
barrier_clone.wait();
with_parser(Language::Rust, |parser| parser.parse(source, None))
.unwrap()
.is_some()
});
barrier.wait();
let main_result = with_parser(Language::Rust, |parser| parser.parse(source, None))
.unwrap()
.is_some();
let thread_result = handle.join().unwrap();
assert!(main_result, "Main thread parse should succeed");
assert!(thread_result, "Spawned thread parse should succeed");
}
#[test]
fn test_multiple_languages_same_thread() {
let test_cases: [(Language, &[u8]); 7] = [
(Language::Rust, b"fn test() {}"),
(Language::Python, b"def test(): pass"),
(Language::C, b"int test() { return 0; }"),
(Language::Cpp, b"void test() {}"),
(Language::Java, b"class Test {}"),
(Language::JavaScript, b"function test() {}"),
(Language::TypeScript, b"function test(): void {}"),
];
for (lang, source) in test_cases {
let result = with_parser(lang, |parser| parser.parse(source, None).is_some());
assert!(
result.is_ok() && result.unwrap(),
"Language {:?} should parse successfully",
lang
);
}
}
#[test]
fn test_parse_simple_rust() {
let source = b"pub fn hello() -> String { \"world\".to_string() }";
let tree = with_parser(Language::Rust, |parser| parser.parse(source, None)).unwrap();
assert!(
tree.is_some(),
"Simple Rust function should parse successfully"
);
}
#[test]
fn test_parse_simple_python() {
let source = b"def hello():\n return \"world\"";
let tree = with_parser(Language::Python, |parser| parser.parse(source, None)).unwrap();
assert!(
tree.is_some(),
"Simple Python function should parse successfully"
);
}
#[test]
fn test_with_parser_unified_api() {
let tree =
with_parser(Language::Rust, |parser| parser.parse(b"struct Test;", None)).unwrap();
assert!(tree.is_some(), "Parser should successfully parse");
assert_eq!(tree.unwrap().root_node().kind(), "source_file");
}
#[test]
fn test_warmup_parsers() {
warmup_parsers().expect("Parser warmup should succeed");
let test_cases: [(Language, &[u8]); 7] = [
(Language::Rust, b"fn test() {}"),
(Language::Python, b"def test(): pass"),
(Language::C, b"int test() { return 0; }"),
(Language::Cpp, b"void test() {}"),
(Language::Java, b"class Test {}"),
(Language::JavaScript, b"function test() {}"),
(Language::TypeScript, b"function test(): void {}"),
];
for (lang, source) in test_cases {
let result = with_parser(lang, |parser| parser.parse(source, None).is_some());
assert!(
result.is_ok() && result.unwrap(),
"Language {:?} should parse successfully after warmup",
lang
);
}
}
#[test]
fn test_warmup_multiple_calls() {
warmup_parsers().expect("First warmup should succeed");
warmup_parsers().expect("Second warmup should succeed");
warmup_parsers().expect("Third warmup should succeed");
}
}