use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use tree_sitter::{Language, Parser, Tree};
use crate::error::TldrError;
use crate::types::Language as TldrLanguage;
use crate::TldrResult;
pub const MAX_PARSE_SIZE: usize = 5 * 1024 * 1024;
pub struct ParserPool {
parsers: Mutex<HashMap<TldrLanguage, Parser>>,
}
impl ParserPool {
pub fn new() -> Self {
Self {
parsers: Mutex::new(HashMap::new()),
}
}
pub fn get_ts_language(lang: TldrLanguage) -> Option<Language> {
match lang {
TldrLanguage::Python => Some(tree_sitter_python::LANGUAGE.into()),
TldrLanguage::TypeScript | TldrLanguage::JavaScript => {
Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
}
TldrLanguage::Go => Some(tree_sitter_go::LANGUAGE.into()),
TldrLanguage::Rust => Some(tree_sitter_rust::LANGUAGE.into()),
TldrLanguage::Java => Some(tree_sitter_java::LANGUAGE.into()),
TldrLanguage::C => Some(tree_sitter_c::LANGUAGE.into()),
TldrLanguage::Cpp => Some(tree_sitter_cpp::LANGUAGE.into()),
TldrLanguage::Ruby => Some(tree_sitter_ruby::LANGUAGE.into()),
TldrLanguage::CSharp => Some(tree_sitter_c_sharp::LANGUAGE.into()),
TldrLanguage::Scala => Some(tree_sitter_scala::LANGUAGE.into()),
TldrLanguage::Php => Some(tree_sitter_php::LANGUAGE_PHP.into()),
TldrLanguage::Lua => Some(tree_sitter_lua::LANGUAGE.into()),
TldrLanguage::Luau => Some(tree_sitter_luau::LANGUAGE.into()),
TldrLanguage::Elixir => Some(tree_sitter_elixir::LANGUAGE.into()),
TldrLanguage::Ocaml => Some(tree_sitter_ocaml::LANGUAGE_OCAML.into()),
TldrLanguage::Kotlin => Some(tree_sitter_kotlin_ng::LANGUAGE.into()),
TldrLanguage::Swift => Some(tree_sitter_swift::LANGUAGE.into()),
}
}
pub fn parse(&self, source: &str, lang: TldrLanguage) -> TldrResult<Tree> {
if source.len() > MAX_PARSE_SIZE {
return Err(TldrError::ParseError {
file: std::path::PathBuf::from("<source>"),
line: None,
message: format!(
"File too large: {} bytes (max {})",
source.len(),
MAX_PARSE_SIZE
),
});
}
let ts_lang = Self::get_ts_language(lang)
.ok_or_else(|| TldrError::UnsupportedLanguage(lang.to_string()))?;
let mut parsers = self.parsers.lock().unwrap();
let parser = parsers.entry(lang).or_insert_with(|| {
let mut p = Parser::new();
p.set_language(&ts_lang).expect("Error loading grammar");
p
});
parser
.set_language(&ts_lang)
.map_err(|e| TldrError::ParseError {
file: std::path::PathBuf::from("<source>"),
line: None,
message: format!("Failed to set language: {}", e),
})?;
parser
.parse(source, None)
.ok_or_else(|| TldrError::ParseError {
file: std::path::PathBuf::from("<source>"),
line: None,
message: "Parsing returned None".to_string(),
})
}
pub fn parse_file(&self, path: &std::path::Path) -> TldrResult<(Tree, String, TldrLanguage)> {
let lang = TldrLanguage::from_path(path).ok_or_else(|| {
let ext = path
.extension()
.map(|e| e.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".to_string());
TldrError::UnsupportedLanguage(ext)
})?;
let bytes = std::fs::read(path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
TldrError::PathNotFound(path.to_path_buf())
} else if e.kind() == std::io::ErrorKind::PermissionDenied {
TldrError::PermissionDenied(path.to_path_buf())
} else {
TldrError::IoError(e)
}
})?;
let source = String::from_utf8_lossy(&bytes).to_string();
let tree = self.parse(&source, lang).map_err(|e| {
if let TldrError::ParseError { line, message, .. } = e {
TldrError::ParseError {
file: path.to_path_buf(),
line,
message,
}
} else {
e
}
})?;
Ok((tree, source, lang))
}
}
impl Default for ParserPool {
fn default() -> Self {
Self::new()
}
}
lazy_static::lazy_static! {
pub static ref PARSER_POOL: Arc<ParserPool> = Arc::new(ParserPool::new());
}
pub fn parse(source: &str, lang: TldrLanguage) -> TldrResult<Tree> {
PARSER_POOL.parse(source, lang)
}
pub fn parse_file(path: &std::path::Path) -> TldrResult<(Tree, String, TldrLanguage)> {
PARSER_POOL.parse_file(path)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_python() {
let source = "def foo(): pass";
let tree = parse(source, TldrLanguage::Python).unwrap();
assert_eq!(tree.root_node().kind(), "module");
}
#[test]
fn test_parse_typescript() {
let source = "function foo() {}";
let tree = parse(source, TldrLanguage::TypeScript).unwrap();
assert_eq!(tree.root_node().kind(), "program");
}
#[test]
fn test_parse_go() {
let source = "package main\nfunc foo() {}";
let tree = parse(source, TldrLanguage::Go).unwrap();
assert_eq!(tree.root_node().kind(), "source_file");
}
#[test]
fn test_parse_rust() {
let source = "fn foo() {}";
let tree = parse(source, TldrLanguage::Rust).unwrap();
assert_eq!(tree.root_node().kind(), "source_file");
}
#[test]
fn test_swift_now_supported() {
let result = parse("let x = 1", TldrLanguage::Swift);
assert!(
result.is_ok(),
"Swift should now parse successfully: {:?}",
result.err()
);
assert_eq!(result.unwrap().root_node().kind(), "source_file");
}
#[test]
fn test_parser_reuse() {
let pool = ParserPool::new();
for _ in 0..5 {
let _ = pool.parse("def foo(): pass", TldrLanguage::Python).unwrap();
}
let parsers = pool.parsers.lock().unwrap();
assert_eq!(parsers.len(), 1);
}
}