use crate::Result;
pub mod source_files;
pub mod source_lines;
pub mod symbols;
#[cfg(feature = "tree-sitter-ast")]
pub mod imports;
#[cfg(feature = "tree-sitter-ast")]
pub mod ast_nodes;
#[cfg(feature = "tree-sitter-ast")]
pub mod tree_sitter;
use std::fs;
use std::io::Read;
use std::path::Path;
use rusqlite::Connection;
pub struct FileInfo {
pub path: String,
pub name: String,
pub extension: String,
pub directory: String,
pub size: u64,
pub modified_at: String,
}
pub struct LineIndex<'a> {
text: &'a str,
line_starts: Vec<usize>,
}
impl<'a> LineIndex<'a> {
pub fn new(text: &'a str) -> Self {
let mut line_starts = Vec::with_capacity(text.lines().count() + 1);
line_starts.push(0);
for (idx, _) in text.match_indices('\n') {
line_starts.push(idx + 1);
}
line_starts.push(text.len());
Self { text, line_starts }
}
pub fn line_for_byte(&self, byte: usize) -> usize {
match self.line_starts.binary_search(&byte) {
Ok(idx) => idx + 1,
Err(idx) => idx,
}
}
pub fn column_for_byte(&self, byte: usize) -> usize {
let line_idx = self
.line_starts
.partition_point(|&start| start <= byte)
.saturating_sub(1);
let line_start = *self.line_starts.get(line_idx).unwrap_or(&0);
byte.saturating_sub(line_start) + 1
}
pub fn inclusive_line_for_end(&self, end_byte: usize) -> usize {
if end_byte == 0 {
1
} else {
self.line_for_byte(end_byte.saturating_sub(1))
}
}
pub fn line_text(&self, line: usize) -> &str {
if line == 0 {
return "";
}
let idx = line - 1;
let start = *self.line_starts.get(idx).unwrap_or(&0);
let end = *self
.line_starts
.get(idx + 1)
.unwrap_or(&self.text.len());
self.text
.get(start..end)
.unwrap_or("")
.trim_end_matches('\n')
}
pub fn slice(&self, start: usize, end: usize) -> &str {
let s = start.min(self.text.len());
let e = end.min(self.text.len());
if s >= e {
""
} else {
&self.text[s..e]
}
}
pub fn text(&self) -> &'a str {
self.text
}
}
pub fn is_binary(path: &Path) -> bool {
let Ok(mut file) = fs::File::open(path) else {
return true; };
let mut buf = [0u8; 8192];
let Ok(n) = file.read(&mut buf) else {
return true;
};
buf[..n].contains(&0)
}
pub fn detect_language(ext: &str) -> &'static str {
match ext {
"rs" => "Rust",
"ts" | "mts" | "cts" => "TypeScript",
"tsx" => "TSX",
"js" | "mjs" | "cjs" => "JavaScript",
"jsx" => "JSX",
"py" => "Python",
"go" => "Go",
"java" => "Java",
"c" => "C",
"cpp" | "cc" | "cxx" => "C++",
"h" => "C Header",
"hpp" | "hxx" => "C++ Header",
"rb" => "Ruby",
"swift" => "Swift",
"kt" | "kts" => "Kotlin",
"scala" => "Scala",
"cs" => "C#",
"php" => "PHP",
"lua" => "Lua",
"r" | "R" => "R",
"sh" | "bash" | "zsh" => "Shell",
"sql" => "SQL",
"html" | "htm" => "HTML",
"css" => "CSS",
"scss" | "sass" => "SCSS",
"json" => "JSON",
"yaml" | "yml" => "YAML",
"toml" => "TOML",
"xml" => "XML",
"md" | "markdown" => "Markdown",
"zig" => "Zig",
"nim" => "Nim",
"ex" | "exs" => "Elixir",
"erl" | "hrl" => "Erlang",
"hs" => "Haskell",
"ml" | "mli" => "OCaml",
"clj" | "cljs" => "Clojure",
"dart" => "Dart",
"vue" => "Vue",
"svelte" => "Svelte",
"tf" | "hcl" => "Terraform",
"proto" => "Protobuf",
"graphql" | "gql" => "GraphQL",
"dockerfile" => "Dockerfile",
"makefile" => "Makefile",
_ => "Other",
}
}
pub fn should_skip_dir(name: &str) -> bool {
matches!(
name,
".git"
| "node_modules"
| "target"
| "dist"
| "build"
| "vendor"
| "__pycache__"
| ".next"
| "coverage"
| ".cache"
| ".idea"
| ".vscode"
| ".tox"
| ".mypy_cache"
| ".pytest_cache"
| ".eggs"
| "venv"
| ".venv"
| "env"
)
}
pub fn walk_source_files(repo_path: &Path) -> Vec<FileInfo> {
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(repo_path)
.hidden(false)
.follow_links(false)
.filter_entry(|entry| {
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
if let Some(name) = entry.file_name().to_str() {
return !should_skip_dir(name);
}
}
true
})
.build();
for entry in walker {
let Ok(entry) = entry else { continue };
if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
continue;
}
let path = entry.path();
if is_binary(path) {
continue;
}
let name = entry
.file_name()
.to_str()
.unwrap_or("")
.to_string();
let extension = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_string();
let rel_path = path
.strip_prefix(repo_path)
.unwrap_or(path)
.to_string_lossy()
.to_string();
let directory = path
.parent()
.map(|p| {
p.strip_prefix(repo_path)
.unwrap_or(p)
.to_string_lossy()
.to_string()
})
.unwrap_or_default();
let metadata = entry.metadata().ok();
let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
let modified_at = metadata
.and_then(|m| m.modified().ok())
.and_then(|t| {
let dt: chrono::DateTime<chrono::Utc> = t.into();
Some(dt.format("%Y-%m-%dT%H:%M:%SZ").to_string())
})
.unwrap_or_default();
files.push(FileInfo {
path: rel_path,
name,
extension,
directory,
size,
modified_at,
});
}
files
}
pub fn load_all_code_tables(conn: &Connection, repo_path: &Path, tables: &[&str]) -> Result<()> {
for table in tables {
match *table {
"source_files" => source_files::load(conn, repo_path)?,
"source_lines" => source_lines::load(conn, repo_path)?,
"symbols" => symbols::load(conn, repo_path)?,
#[cfg(feature = "tree-sitter-ast")]
"imports" => imports::load(conn, repo_path)?,
#[cfg(feature = "tree-sitter-ast")]
"ast_nodes" => ast_nodes::load(conn, repo_path)?,
_ => {}
}
}
Ok(())
}