pub mod languages;
pub mod multi_language;
pub mod tree_sitter;
use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChunkType {
Function,
Method,
Class,
Struct,
Enum,
Interface,
Trait,
Impl,
Module,
ModuleLevel,
ImportBlock,
Namespace,
Macro,
Section,
Document,
}
impl fmt::Display for ChunkType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ChunkType::Function => write!(f, "function"),
ChunkType::Method => write!(f, "method"),
ChunkType::Class => write!(f, "class"),
ChunkType::Struct => write!(f, "struct"),
ChunkType::Enum => write!(f, "enum"),
ChunkType::Interface => write!(f, "interface"),
ChunkType::Trait => write!(f, "trait"),
ChunkType::Impl => write!(f, "impl"),
ChunkType::Module => write!(f, "module"),
ChunkType::ModuleLevel => write!(f, "module_level"),
ChunkType::ImportBlock => write!(f, "import_block"),
ChunkType::Namespace => write!(f, "namespace"),
ChunkType::Macro => write!(f, "macro"),
ChunkType::Section => write!(f, "section"),
ChunkType::Document => write!(f, "document"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeChunk {
pub id: String,
pub content: String,
pub chunk_type: ChunkType,
pub start_line: usize,
pub end_line: usize,
pub file_path: String,
pub relative_path: String,
pub folder_structure: Vec<String>,
pub name: Option<String>,
pub parent_name: Option<String>,
pub language: String,
pub docstring: Option<String>,
pub decorators: Vec<String>,
pub imports: Vec<String>,
pub tags: Vec<String>,
pub complexity_score: u32,
}
impl CodeChunk {
pub fn generate_id(file_path: &str, start_line: usize, end_line: usize, name: Option<&str>) -> String {
use sha2::{Digest, Sha256};
let input = format!("{}:{}:{}:{}", file_path, start_line, end_line, name.unwrap_or(""));
let hash = Sha256::digest(input.as_bytes());
format!("{:x}", hash)[..16].to_string()
}
pub fn extract_folder_structure(relative_path: &str) -> Vec<String> {
let path = std::path::Path::new(relative_path);
path.parent()
.map(|p| {
p.components()
.filter_map(|c| match c {
std::path::Component::Normal(s) => s.to_str().map(String::from),
_ => None,
})
.collect()
})
.unwrap_or_default()
}
pub fn compute_complexity(content: &str) -> u32 {
let lines = content.lines().count();
let branches = content.matches("if ").count()
+ content.matches("else").count()
+ content.matches("match ").count()
+ content.matches("for ").count()
+ content.matches("while ").count()
+ content.matches("case ").count();
(lines + branches * 2) as u32
}
pub fn extract_tags(content: &str, chunk_type: &ChunkType) -> Vec<String> {
let mut tags = Vec::new();
let lower = content.to_lowercase();
if lower.contains("async ") || lower.contains("await ") || lower.contains(".then(") {
tags.push("async".to_string());
}
if lower.contains("query") || lower.contains("sql") || lower.contains("database")
|| lower.contains("insert") || lower.contains("select ") || lower.contains("table")
{
tags.push("database".to_string());
}
if lower.contains("auth") || lower.contains("login") || lower.contains("token")
|| lower.contains("password") || lower.contains("session") || lower.contains("permission")
{
tags.push("auth".to_string());
}
if lower.contains("error") || lower.contains("exception") || lower.contains("try ")
|| lower.contains("catch") || lower.contains("result<") || lower.contains("anyhow")
{
tags.push("error_handling".to_string());
}
if lower.contains("endpoint") || lower.contains("route") || lower.contains("request")
|| lower.contains("response") || lower.contains("http") || lower.contains("api")
{
tags.push("api".to_string());
}
if lower.contains("#[test]") || lower.contains("#[cfg(test)]")
|| lower.contains("assert") || lower.contains("mock") || lower.contains("fixture")
{
tags.push("test".to_string());
}
if lower.contains("export ") || lower.contains("module.exports") || lower.contains("pub ") {
tags.push("export".to_string());
}
match chunk_type {
ChunkType::Class | ChunkType::Struct => tags.push("type_definition".to_string()),
ChunkType::Interface | ChunkType::Trait => tags.push("interface".to_string()),
ChunkType::ImportBlock => tags.push("imports".to_string()),
_ => {}
}
tags.sort();
tags.dedup();
tags
}
}