use crate::io::ExitCode;
use serde::Serialize;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use thiserror::Error;
#[derive(Error, Debug)]
pub enum ParseError {
#[error("File not found: {path}\nSuggestion: Check if the file exists and the path is correct")]
FileNotFound { path: String },
#[error(
"Unable to detect language from file extension: {extension}\nSuggestion: Use a supported file extension (rs, py, ts, tsx, js, jsx, php, go, c, clj, cpp, cs, gd, java, kt, lua, swift)"
)]
UnsupportedLanguage { extension: String },
#[error(
"Failed to read file: {path}\n{source}\nSuggestion: Check file permissions and ensure it's a valid text file"
)]
FileReadError {
path: String,
#[source]
source: std::io::Error,
},
#[error(
"Failed to create output file: {path}\n{source}\nSuggestion: Check write permissions for the directory"
)]
OutputCreateError {
path: String,
#[source]
source: std::io::Error,
},
#[error(
"Failed to write output\n{source}\nSuggestion: Check disk space and output file permissions"
)]
OutputWriteError {
#[source]
source: std::io::Error,
},
#[error(
"Failed to set parser language\n{source}\nSuggestion: This is an internal error, please report it"
)]
LanguageSetupError {
#[source]
source: tree_sitter::LanguageError,
},
#[error(
"Failed to parse file\nSuggestion: Check if the file has valid syntax for the detected language"
)]
ParseFailure,
#[error(
"Failed to serialize node to JSON\n{source}\nSuggestion: This is an internal error, please report it"
)]
SerializationError {
#[from]
source: serde_json::Error,
},
}
impl ParseError {
pub fn exit_code(&self) -> ExitCode {
match self {
ParseError::FileNotFound { .. } => ExitCode::NotFound,
ParseError::UnsupportedLanguage { .. } => ExitCode::UnsupportedOperation,
ParseError::FileReadError { .. } => ExitCode::IoError,
ParseError::OutputCreateError { .. } => ExitCode::IoError,
ParseError::OutputWriteError { .. } => ExitCode::IoError,
ParseError::LanguageSetupError { .. } => ExitCode::ParseError,
ParseError::ParseFailure => ExitCode::ParseError,
ParseError::SerializationError { .. } => ExitCode::GeneralError,
}
}
}
#[derive(Debug, Serialize)]
pub struct NodeInfo {
pub node: String,
pub start: [usize; 2],
pub end: [usize; 2],
pub kind_id: u16,
pub depth: usize,
pub id: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
}
pub struct ParseOutput {
writer: Box<dyn Write>,
}
impl ParseOutput {
pub fn new(output_path: Option<PathBuf>) -> Result<Self, ParseError> {
let writer: Box<dyn Write> = if let Some(path) = output_path {
Box::new(
std::fs::File::create(&path).map_err(|e| ParseError::OutputCreateError {
path: path.display().to_string(),
source: e,
})?,
)
} else {
Box::new(io::stdout())
};
Ok(Self { writer })
}
pub fn write_node(&mut self, node: &NodeInfo) -> Result<(), ParseError> {
let json = serde_json::to_string(node)?;
writeln!(self.writer, "{json}").map_err(|e| ParseError::OutputWriteError { source: e })?;
self.writer
.flush()
.map_err(|e| ParseError::OutputWriteError { source: e })?;
Ok(())
}
}
pub fn walk_and_stream(
node: tree_sitter::Node,
code: &str,
writer: &mut ParseOutput,
depth: usize,
parent_id: Option<usize>,
node_counter: &mut usize,
max_depth: Option<usize>,
all_nodes: bool,
) -> Result<(), ParseError> {
let current_id = *node_counter;
*node_counter += 1;
let node_kind = node.kind();
let is_named_node = node.is_named();
if all_nodes || is_named_node {
let name = if node_kind == "identifier"
|| node_kind == "property_identifier"
|| node_kind == "type_identifier"
|| node_kind == "field_identifier"
{
node.utf8_text(code.as_bytes()).ok().map(String::from)
} else {
None
};
let info = NodeInfo {
node: node_kind.to_string(),
start: [node.start_position().row, node.start_position().column],
end: [node.end_position().row, node.end_position().column],
kind_id: node.kind_id(),
depth,
id: current_id,
parent: parent_id,
name,
};
writer.write_node(&info)?;
}
if let Some(max) = max_depth {
if depth >= max {
return Ok(());
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_and_stream(
child,
code,
writer,
depth + 1,
Some(current_id),
node_counter,
max_depth,
all_nodes,
)?;
}
Ok(())
}
pub fn execute_parse(
file_path: &Path,
output_path: Option<PathBuf>,
max_depth: Option<usize>,
all_nodes: bool,
) -> Result<(), ParseError> {
use crate::parsing::Language;
if !file_path.exists() {
return Err(ParseError::FileNotFound {
path: file_path.display().to_string(),
});
}
let extension = file_path
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");
let language =
Language::from_extension(extension).ok_or_else(|| ParseError::UnsupportedLanguage {
extension: extension.to_string(),
})?;
let bytes = std::fs::read(file_path).map_err(|e| ParseError::FileReadError {
path: file_path.display().to_string(),
source: e,
})?;
let code = String::from_utf8_lossy(&bytes).into_owned();
let mut parser = tree_sitter::Parser::new();
let ts_language = match language {
Language::Rust => tree_sitter_rust::LANGUAGE.into(),
Language::Python => tree_sitter_python::LANGUAGE.into(),
Language::TypeScript => tree_sitter_typescript::LANGUAGE_TSX.into(),
Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
Language::Go => tree_sitter_go::LANGUAGE.into(),
Language::C => tree_sitter_c::LANGUAGE.into(),
Language::Clojure => tree_sitter_clojure_orchard::LANGUAGE.into(),
Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
Language::Gdscript => tree_sitter_gdscript::LANGUAGE.into(),
Language::Java => tree_sitter_java::LANGUAGE.into(),
Language::Kotlin => tree_sitter_kotlin::language(),
Language::Lua => tree_sitter_lua::LANGUAGE.into(),
Language::Swift => tree_sitter_swift::LANGUAGE.into(),
};
parser
.set_language(&ts_language)
.map_err(|e| ParseError::LanguageSetupError { source: e })?;
let tree = parser.parse(&code, None).ok_or(ParseError::ParseFailure)?;
let mut output_handler = ParseOutput::new(output_path)?;
let mut node_counter = 0;
walk_and_stream(
tree.root_node(),
&code,
&mut output_handler,
0,
None,
&mut node_counter,
max_depth,
all_nodes,
)?;
Ok(())
}