pub mod lang;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicUsize;
pub use lang::{Lang, detect};
pub const MAX_FILE_BYTES: u64 = 2 * 1024 * 1024;
const MAX_SIGNATURE_CHARS: usize = 240;
const MAX_DOC_CHARS: usize = 240;
#[derive(Debug, Clone)]
pub struct Def {
pub kind: String,
pub name: String,
pub line_start: usize,
pub line_end: usize,
pub signature: String,
pub doc: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Ref {
pub kind: String,
pub name: String,
pub line: usize,
}
#[derive(Debug, Clone, Default)]
pub struct ParsedFile {
pub defs: Vec<Def>,
pub refs: Vec<Ref>,
}
pub fn hash_bytes(bytes: &[u8]) -> String {
blake3::hash(bytes).to_hex().to_string()
}
fn normalize_def_kind(raw: &str) -> &str {
match raw {
"function" | "macro" => "function",
"method" => "method",
"class" | "interface" | "struct" | "type" | "enum" | "trait" => "class",
"module" | "namespace" => "module",
"constant" => "constant",
other => other,
}
}
fn first_line(source: &[u8], start: usize) -> String {
let end = source[start..]
.iter()
.position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(source.len());
let mut s = String::from_utf8_lossy(&source[start..end]).trim().to_string();
if s.chars().count() > MAX_SIGNATURE_CHARS {
s = s.chars().take(MAX_SIGNATURE_CHARS).collect::<String>() + "…";
}
s
}
fn clamp_doc(doc: &str) -> Option<String> {
let line = doc.lines().find(|l| !l.trim().is_empty())?.trim();
if line.is_empty() {
return None;
}
let s = if line.chars().count() > MAX_DOC_CHARS {
line.chars().take(MAX_DOC_CHARS).collect::<String>() + "…"
} else {
line.to_string()
};
Some(s)
}
pub fn parse_source(lang: Lang, source: &[u8]) -> ParsedFile {
let Some(config) = lang::config(lang) else {
return ParsedFile::default();
};
let mut ctx = tree_sitter_tags::TagsContext::new();
let cancel = AtomicUsize::new(0);
let (tags, _failed) = match ctx.generate_tags(config, source, Some(&cancel)) {
Ok(v) => v,
Err(_) => return ParsedFile::default(),
};
let line_starts: Vec<usize> = std::iter::once(0)
.chain(source.iter().enumerate().filter(|&(_, &b)| b == b'\n').map(|(i, _)| i + 1))
.collect();
let line_of = |byte: usize| line_starts.partition_point(|&s| s <= byte).max(1);
let mut out = ParsedFile::default();
for tag in tags.flatten() {
let name = String::from_utf8_lossy(&source[tag.name_range.clone()]).to_string();
if name.is_empty() {
continue;
}
let kind = config.syntax_type_name(tag.syntax_type_id).to_string();
if tag.is_definition {
let end_byte = tag.range.end.saturating_sub(1).max(tag.range.start);
out.defs.push(Def {
kind: normalize_def_kind(&kind).to_string(),
name,
line_start: line_of(tag.range.start),
line_end: line_of(end_byte),
signature: first_line(source, tag.range.start),
doc: tag.docs.as_deref().and_then(clamp_doc),
});
} else {
out.refs.push(Ref {
kind,
name,
line: tag.span.start.row + 1,
});
}
}
out
}
pub fn walk(root: &Path, max_bytes: u64) -> Vec<PathBuf> {
let mut files = Vec::new();
if root.is_file() {
if detect(root).is_some()
&& std::fs::metadata(root).map(|m| m.len() <= max_bytes).unwrap_or(false)
{
files.push(root.to_path_buf());
}
return files;
}
let walker = ignore::WalkBuilder::new(root)
.standard_filters(true)
.hidden(true)
.git_ignore(true)
.git_global(true)
.require_git(false)
.filter_entry(|e| {
let name = e.file_name().to_string_lossy();
!matches!(name.as_ref(), "target" | "node_modules" | ".git" | "dist" | "build")
})
.build();
for entry in walker.flatten() {
let path = entry.path();
if !path.is_file() || detect(path).is_none() {
continue;
}
if std::fs::metadata(path).map(|m| m.len() > max_bytes).unwrap_or(true) {
continue;
}
files.push(path.to_path_buf());
}
files
}