pub mod cpp;
pub mod csharp;
pub mod css;
pub mod dart;
pub mod go;
pub mod html;
pub mod java;
pub mod php;
pub mod python;
pub mod rust_lang;
pub mod shared;
pub mod swift;
pub mod typescript;
use crate::code_tree::models::ParseResult;
use rayon::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use walkdir::WalkDir;
const PARSER_THREAD_STACK_SIZE: usize = 16 * 1024 * 1024;
fn parser_pool() -> &'static rayon::ThreadPool {
static POOL: OnceLock<rayon::ThreadPool> = OnceLock::new();
POOL.get_or_init(|| {
rayon::ThreadPoolBuilder::new()
.stack_size(PARSER_THREAD_STACK_SIZE)
.thread_name(|i| format!("kglite-parser-{i}"))
.build()
.expect("failed to build kglite parser thread pool")
})
}
pub const EXTENSION_MAP: &[(&str, &str)] = &[
("rs", "rust"),
("py", "python"),
("pyi", "python"),
("ts", "typescript"),
("tsx", "typescript"),
("js", "javascript"),
("jsx", "javascript"),
("mjs", "javascript"),
("go", "go"),
("java", "java"),
("cs", "csharp"),
("c", "c"),
("h", "c"),
("cpp", "cpp"),
("cc", "cpp"),
("cxx", "cpp"),
("hpp", "cpp"),
("hh", "cpp"),
("hxx", "cpp"),
("swift", "swift"),
("php", "php"),
("html", "html"),
("htm", "html"),
("css", "css"),
("dart", "dart"),
];
pub fn language_for_path(path: &Path) -> Option<&'static str> {
let ext = path.extension()?.to_str()?;
EXTENSION_MAP
.iter()
.find(|(e, _)| *e == ext)
.map(|(_, lang)| *lang)
}
pub trait LanguageParser: Sync {
fn language_name(&self) -> &'static str;
fn file_extensions(&self) -> &'static [&'static str];
fn noise_names(&self) -> &'static [&'static str] {
&[]
}
fn parse_file(&self, filepath: &Path, src_root: &Path) -> ParseResult;
fn parse_files(&self, files: &[PathBuf], src_root: &Path) -> ParseResult {
parser_pool().install(|| {
files
.par_iter()
.map(|fp| self.parse_file(fp, src_root))
.reduce(ParseResult::new, |mut acc, r| {
acc.merge(r);
acc
})
})
}
fn parse_directory(&self, src_root: &Path, verbose: bool) -> ParseResult {
let files: Vec<PathBuf> = WalkDir::new(src_root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
.filter_map(|entry| {
let path = entry.path();
let ext = path.extension()?.to_str()?;
if self.file_extensions().contains(&ext) {
Some(path.to_path_buf())
} else {
None
}
})
.collect();
if verbose {
eprintln!(" Found {} {} files", files.len(), self.language_name());
}
self.parse_files(&files, src_root)
}
}
pub fn detect_languages(src_root: &Path) -> Vec<&'static str> {
let mut langs: std::collections::BTreeSet<&'static str> = std::collections::BTreeSet::new();
for entry in WalkDir::new(src_root).into_iter().filter_map(Result::ok) {
if entry.file_type().is_file() {
if let Some(lang) = language_for_path(entry.path()) {
langs.insert(lang);
}
}
}
langs.into_iter().collect()
}
pub fn get_parser(language: &str) -> Option<Box<dyn LanguageParser + Send + Sync>> {
match language {
"python" => Some(Box::new(python::PythonParser::new())),
"rust" => Some(Box::new(rust_lang::RustParser::new())),
"typescript" => Some(Box::new(typescript::JstsParser::typescript())),
"javascript" => Some(Box::new(typescript::JstsParser::javascript())),
"go" => Some(Box::new(go::GoParser::new())),
"java" => Some(Box::new(java::JavaParser::new())),
"csharp" => Some(Box::new(csharp::CSharpParser::new())),
"c" => Some(Box::new(cpp::CppParser::c())),
"cpp" => Some(Box::new(cpp::CppParser::cpp())),
"swift" => Some(Box::new(swift::SwiftParser::new())),
"php" => Some(Box::new(php::PhpParser::new())),
"html" => Some(Box::new(html::HtmlParser::new())),
"css" => Some(Box::new(css::CssParser::new())),
"dart" => Some(Box::new(dart::DartParser::new())),
_ => None,
}
}
pub fn get_parsers_for_directory(src_root: &Path) -> Vec<Box<dyn LanguageParser + Send + Sync>> {
detect_languages(src_root)
.into_iter()
.filter_map(get_parser)
.collect()
}