mod rust;
mod typescript;
mod javascript;
mod python;
mod go;
use std::cell::RefCell;
use std::path::{Path, PathBuf};
use smallvec::SmallVec;
use tree_sitter::{Node, Parser};
thread_local! {
static RUST_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_rust::LANGUAGE.into())
.expect("failed to load tree-sitter-rust grammar");
p
});
static TS_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
.expect("failed to load tree-sitter-typescript grammar");
p
});
static TSX_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())
.expect("failed to load tree-sitter-tsx grammar");
p
});
static PYTHON_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_python::LANGUAGE.into())
.expect("failed to load tree-sitter-python grammar");
p
});
static GO_PARSER: RefCell<Parser> = RefCell::new({
let mut p = Parser::new();
p.set_language(&tree_sitter_go::LANGUAGE.into())
.expect("failed to load tree-sitter-go grammar");
p
});
}
pub(crate) fn with_rust_parser<F, R>(f: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
RUST_PARSER.with(|p| f(&mut p.borrow_mut()))
}
pub(crate) fn with_ts_parser<F, R>(f: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
TS_PARSER.with(|p| f(&mut p.borrow_mut()))
}
pub(crate) fn with_tsx_parser<F, R>(f: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
TSX_PARSER.with(|p| f(&mut p.borrow_mut()))
}
pub(crate) fn with_python_parser<F, R>(f: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
PYTHON_PARSER.with(|p| f(&mut p.borrow_mut()))
}
pub(crate) fn with_go_parser<F, R>(f: F) -> R
where
F: FnOnce(&mut Parser) -> R,
{
GO_PARSER.with(|p| f(&mut p.borrow_mut()))
}
pub(crate) fn find_child_by_kind<'a>(node: Node<'a>, kind: &str) -> Option<Node<'a>> {
node.children(&mut node.walk())
.find(|c| c.kind() == kind)
}
pub(crate) fn node_text(node: Node, content: &str) -> String {
content[node.byte_range()].to_string()
}
use thiserror::Error;
use crate::filter::Language;
use crate::tokens::count_tokens;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Visibility {
Public,
#[default]
Private,
Crate,
Protected,
}
impl std::fmt::Display for Visibility {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Visibility::Public => write!(f, "pub"),
Visibility::Private => write!(f, "private"),
Visibility::Crate => write!(f, "pub(crate)"),
Visibility::Protected => write!(f, "protected"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Location {
pub start_line: usize,
pub end_line: usize,
}
impl Location {
pub fn new(start_line: usize, end_line: usize) -> Self {
Self { start_line, end_line }
}
pub fn single_line(line: usize) -> Self {
Self { start_line: line, end_line: line }
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Field {
pub name: String,
pub ty: String,
pub visibility: Visibility,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Declaration {
Function {
name: String,
signature: String,
visibility: Visibility,
location: Location,
is_async: bool,
doc: Option<String>,
},
Struct {
name: String,
fields: SmallVec<[Field; 6]>,
visibility: Visibility,
location: Location,
methods: Vec<Declaration>, doc: Option<String>,
},
Enum {
name: String,
variants: SmallVec<[String; 6]>,
visibility: Visibility,
location: Location,
doc: Option<String>,
},
Trait {
name: String,
methods: SmallVec<[String; 8]>,
location: Location,
doc: Option<String>,
},
TypeAlias {
name: String,
target: String,
visibility: Visibility,
location: Location,
},
Const {
name: String,
ty: String,
visibility: Visibility,
location: Location,
},
Interface {
name: String,
members: SmallVec<[String; 8]>,
location: Location,
doc: Option<String>,
},
Class {
name: String,
members: Vec<Declaration>, visibility: Visibility,
location: Location,
doc: Option<String>,
},
}
impl Declaration {
pub fn name(&self) -> &str {
match self {
Declaration::Function { name, .. } => name,
Declaration::Struct { name, .. } => name,
Declaration::Enum { name, .. } => name,
Declaration::Trait { name, .. } => name,
Declaration::TypeAlias { name, .. } => name,
Declaration::Const { name, .. } => name,
Declaration::Interface { name, .. } => name,
Declaration::Class { name, .. } => name,
}
}
pub fn visibility(&self) -> Visibility {
match self {
Declaration::Function { visibility, .. } => *visibility,
Declaration::Struct { visibility, .. } => *visibility,
Declaration::Enum { visibility, .. } => *visibility,
Declaration::Trait { .. } => Visibility::Public, Declaration::TypeAlias { visibility, .. } => *visibility,
Declaration::Const { visibility, .. } => *visibility,
Declaration::Interface { .. } => Visibility::Public,
Declaration::Class { visibility, .. } => *visibility,
}
}
pub fn location(&self) -> Location {
match self {
Declaration::Function { location, .. } => *location,
Declaration::Struct { location, .. } => *location,
Declaration::Enum { location, .. } => *location,
Declaration::Trait { location, .. } => *location,
Declaration::TypeAlias { location, .. } => *location,
Declaration::Const { location, .. } => *location,
Declaration::Interface { location, .. } => *location,
Declaration::Class { location, .. } => *location,
}
}
pub fn is_public(&self) -> bool {
matches!(self.visibility(), Visibility::Public)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Import {
pub source: String,
pub items: SmallVec<[String; 4]>,
}
#[derive(Debug, Clone)]
pub struct Codemap {
pub path: PathBuf,
pub language: Language,
pub imports: SmallVec<[Import; 8]>,
pub declarations: SmallVec<[Declaration; 16]>,
pub token_count: usize,
pub parse_error: Option<String>,
}
impl Codemap {
pub fn empty(path: PathBuf, language: Language) -> Self {
Self {
path,
language,
imports: SmallVec::new(),
declarations: SmallVec::new(),
token_count: 0,
parse_error: None,
}
}
pub fn with_error(path: PathBuf, language: Language, error: String) -> Self {
Self {
path,
language,
imports: SmallVec::new(),
declarations: SmallVec::new(),
token_count: 0,
parse_error: Some(error),
}
}
pub fn public_only(&self) -> impl Iterator<Item = &Declaration> {
self.declarations.iter().filter(|d| d.is_public())
}
pub fn declaration_count(&self) -> usize {
fn count_nested(decl: &Declaration) -> usize {
let nested = match decl {
Declaration::Struct { methods, .. } => methods.iter().map(count_nested).sum(),
Declaration::Class { members, .. } => members.iter().map(count_nested).sum(),
_ => 0,
};
1 + nested
}
self.declarations.iter().map(count_nested).sum()
}
}
#[derive(Debug, Clone, Default)]
pub struct ExtractOptions {
pub include_docs: bool,
pub include_private: bool,
}
impl ExtractOptions {
pub fn with_docs() -> Self {
Self {
include_docs: true,
include_private: true,
}
}
pub fn public_only() -> Self {
Self {
include_docs: false,
include_private: false,
}
}
}
#[derive(Debug, Error)]
pub enum CodemapError {
#[error("failed to initialize {language} parser")]
ParserInit { language: Language },
#[error("parse error in {path}: {message}")]
Parse { path: PathBuf, message: String },
#[error("unsupported language for file: {path}")]
UnsupportedLanguage { path: PathBuf },
#[error("failed to read file: {path}")]
ReadFailed {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
pub fn extract_codemap(
path: &Path,
content: &str,
language: Language,
options: &ExtractOptions,
) -> Codemap {
let mut codemap = Codemap::empty(path.to_path_buf(), language);
let result = match language {
Language::Rust => rust::extract(content, options),
Language::TypeScript | Language::Tsx => typescript::extract(content, language, options),
Language::JavaScript | Language::Jsx => javascript::extract(content, language, options),
Language::Python => python::extract(content, options),
Language::Go => go::extract(content, options),
};
match result {
Ok((imports, declarations)) => {
codemap.imports = imports.into();
codemap.declarations = declarations.into();
}
Err(e) => {
codemap.parse_error = Some(e);
}
}
let rendered = render_codemap_simple(&codemap);
codemap.token_count = count_tokens(&rendered);
codemap
}
fn render_codemap_simple(codemap: &Codemap) -> String {
let mut output = String::with_capacity(1024);
for import in &codemap.imports {
output.push_str(&import.source);
output.push('\n');
}
for decl in &codemap.declarations {
render_declaration_simple(&mut output, decl);
}
output
}
fn render_declaration_simple(output: &mut String, decl: &Declaration) {
match decl {
Declaration::Function { signature, .. } => {
output.push_str(signature);
output.push('\n');
}
Declaration::Struct { name, fields, methods, .. } => {
output.push_str("struct ");
output.push_str(name);
output.push_str(" { ");
for field in fields {
output.push_str(&field.name);
output.push_str(": ");
output.push_str(&field.ty);
output.push_str(", ");
}
output.push_str("}\n");
for method in methods {
render_declaration_simple(output, method);
}
}
Declaration::Enum { name, variants, .. } => {
output.push_str("enum ");
output.push_str(name);
output.push_str(" { ");
output.push_str(&variants.join(", "));
output.push_str(" }\n");
}
Declaration::Trait { name, methods, .. } => {
output.push_str("trait ");
output.push_str(name);
output.push_str(" { ");
output.push_str(&methods.join("; "));
output.push_str(" }\n");
}
Declaration::TypeAlias { name, target, .. } => {
output.push_str("type ");
output.push_str(name);
output.push_str(" = ");
output.push_str(target);
output.push('\n');
}
Declaration::Const { name, ty, .. } => {
output.push_str("const ");
output.push_str(name);
output.push_str(": ");
output.push_str(ty);
output.push('\n');
}
Declaration::Interface { name, members, .. } => {
output.push_str("interface ");
output.push_str(name);
output.push_str(" { ");
output.push_str(&members.join("; "));
output.push_str(" }\n");
}
Declaration::Class { name, members, .. } => {
output.push_str("class ");
output.push_str(name);
output.push_str(" {\n");
for member in members {
render_declaration_simple(output, member);
}
output.push_str("}\n");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_codemap() {
let codemap = Codemap::empty(PathBuf::from("test.rs"), Language::Rust);
assert!(codemap.imports.is_empty());
assert!(codemap.declarations.is_empty());
assert!(codemap.parse_error.is_none());
}
#[test]
fn test_codemap_with_error() {
let codemap = Codemap::with_error(
PathBuf::from("test.rs"),
Language::Rust,
"parse error".into(),
);
assert!(codemap.parse_error.is_some());
}
#[test]
fn test_declaration_name() {
let func = Declaration::Function {
name: "test".into(),
signature: "fn test()".into(),
visibility: Visibility::Public,
location: Location::single_line(1),
is_async: false,
doc: None,
};
assert_eq!(func.name(), "test");
}
#[test]
fn test_visibility_display() {
assert_eq!(Visibility::Public.to_string(), "pub");
assert_eq!(Visibility::Private.to_string(), "private");
assert_eq!(Visibility::Crate.to_string(), "pub(crate)");
}
}