1pub mod languages;
2pub mod multi_language;
3pub mod tree_sitter;
4
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum ChunkType {
12 Function,
13 Method,
14 Class,
15 Struct,
16 Enum,
17 Interface,
18 Trait,
19 Impl,
20 Module,
21 ModuleLevel,
22 ImportBlock,
23 Namespace,
24 Macro,
25 Section,
26 Document,
27}
28
29impl fmt::Display for ChunkType {
30 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31 match self {
32 ChunkType::Function => write!(f, "function"),
33 ChunkType::Method => write!(f, "method"),
34 ChunkType::Class => write!(f, "class"),
35 ChunkType::Struct => write!(f, "struct"),
36 ChunkType::Enum => write!(f, "enum"),
37 ChunkType::Interface => write!(f, "interface"),
38 ChunkType::Trait => write!(f, "trait"),
39 ChunkType::Impl => write!(f, "impl"),
40 ChunkType::Module => write!(f, "module"),
41 ChunkType::ModuleLevel => write!(f, "module_level"),
42 ChunkType::ImportBlock => write!(f, "import_block"),
43 ChunkType::Namespace => write!(f, "namespace"),
44 ChunkType::Macro => write!(f, "macro"),
45 ChunkType::Section => write!(f, "section"),
46 ChunkType::Document => write!(f, "document"),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct CodeChunk {
54 pub id: String,
56 pub content: String,
58 pub chunk_type: ChunkType,
60 pub start_line: usize,
62 pub end_line: usize,
64 pub file_path: String,
66 pub relative_path: String,
68 pub folder_structure: Vec<String>,
70 pub name: Option<String>,
72 pub parent_name: Option<String>,
74 pub language: String,
76 pub docstring: Option<String>,
78 pub decorators: Vec<String>,
80 pub imports: Vec<String>,
82 pub tags: Vec<String>,
84 pub complexity_score: u32,
86}
87
88impl CodeChunk {
89 pub fn generate_id(file_path: &str, start_line: usize, end_line: usize, name: Option<&str>) -> String {
91 use sha2::{Digest, Sha256};
92 let input = format!("{}:{}:{}:{}", file_path, start_line, end_line, name.unwrap_or(""));
93 let hash = Sha256::digest(input.as_bytes());
94 format!("{:x}", hash)[..16].to_string()
95 }
96
97 pub fn extract_folder_structure(relative_path: &str) -> Vec<String> {
99 let path = std::path::Path::new(relative_path);
100 path.parent()
101 .map(|p| {
102 p.components()
103 .filter_map(|c| match c {
104 std::path::Component::Normal(s) => s.to_str().map(String::from),
105 _ => None,
106 })
107 .collect()
108 })
109 .unwrap_or_default()
110 }
111
112 pub fn compute_complexity(content: &str) -> u32 {
114 let lines = content.lines().count();
115 let branches = content.matches("if ").count()
116 + content.matches("else").count()
117 + content.matches("match ").count()
118 + content.matches("for ").count()
119 + content.matches("while ").count()
120 + content.matches("case ").count();
121 (lines + branches * 2) as u32
122 }
123
124 pub fn extract_tags(content: &str, chunk_type: &ChunkType) -> Vec<String> {
126 let mut tags = Vec::new();
127 let lower = content.to_lowercase();
128
129 if lower.contains("async ") || lower.contains("await ") || lower.contains(".then(") {
131 tags.push("async".to_string());
132 }
133
134 if lower.contains("query") || lower.contains("sql") || lower.contains("database")
136 || lower.contains("insert") || lower.contains("select ") || lower.contains("table")
137 {
138 tags.push("database".to_string());
139 }
140
141 if lower.contains("auth") || lower.contains("login") || lower.contains("token")
143 || lower.contains("password") || lower.contains("session") || lower.contains("permission")
144 {
145 tags.push("auth".to_string());
146 }
147
148 if lower.contains("error") || lower.contains("exception") || lower.contains("try ")
150 || lower.contains("catch") || lower.contains("result<") || lower.contains("anyhow")
151 {
152 tags.push("error_handling".to_string());
153 }
154
155 if lower.contains("endpoint") || lower.contains("route") || lower.contains("request")
157 || lower.contains("response") || lower.contains("http") || lower.contains("api")
158 {
159 tags.push("api".to_string());
160 }
161
162 if lower.contains("#[test]") || lower.contains("#[cfg(test)]")
164 || lower.contains("assert") || lower.contains("mock") || lower.contains("fixture")
165 {
166 tags.push("test".to_string());
167 }
168
169 if lower.contains("export ") || lower.contains("module.exports") || lower.contains("pub ") {
171 tags.push("export".to_string());
172 }
173
174 match chunk_type {
176 ChunkType::Class | ChunkType::Struct => tags.push("type_definition".to_string()),
177 ChunkType::Interface | ChunkType::Trait => tags.push("interface".to_string()),
178 ChunkType::ImportBlock => tags.push("imports".to_string()),
179 _ => {}
180 }
181
182 tags.sort();
183 tags.dedup();
184 tags
185 }
186}