1use crate::core::config::{CHUNK_MAX_LINES, CHUNK_OVERLAP};
2use crate::parse::walker::detect_language;
3use std::path::Path;
4
5#[derive(Debug, Clone)]
7pub struct Chunk {
8 pub path: String,
9 pub content: String,
10 pub symbol_name: Option<String>,
11 pub symbol_type: Option<String>,
12 pub start_line: usize,
13 pub end_line: usize,
14 pub language: String,
15 pub file_hash: String,
16}
17
18impl Chunk {
19 pub fn id(&self) -> String {
21 format!("{}:{}:{}", self.path, self.start_line, self.end_line)
22 }
23}
24
25pub fn chunk_file(path: &Path, content: &str) -> Vec<Chunk> {
27 let language = detect_language(path);
28 let file_hash = compute_hash(content);
29 let path_str = path.to_string_lossy().to_string();
30
31 let lines: Vec<&str> = content.lines().collect();
32
33 if lines.is_empty() {
34 return Vec::new();
35 }
36
37 let mut chunks = Vec::new();
38 let mut start = 0;
39
40 while start < lines.len() {
41 let end = (start + CHUNK_MAX_LINES).min(lines.len());
42 let chunk_content = lines[start..end].join("\n");
43
44 let (symbol_name, symbol_type) = extract_symbol(&lines[start..end]);
46
47 chunks.push(Chunk {
48 path: path_str.clone(),
49 content: chunk_content,
50 symbol_name,
51 symbol_type,
52 start_line: start + 1, end_line: end,
54 language: language.clone(),
55 file_hash: file_hash.clone(),
56 });
57
58 if end >= lines.len() {
59 break;
60 }
61
62 start = end.saturating_sub(CHUNK_OVERLAP);
63 }
64
65 chunks
66}
67
68fn compute_hash(content: &str) -> String {
70 let hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
71 format!("{:016x}", hash)
72}
73
74fn extract_symbol(lines: &[&str]) -> (Option<String>, Option<String>) {
76 for line in lines {
77 let trimmed = line.trim();
78
79 if let Some(name) = extract_function_name(trimmed) {
81 return (Some(name), Some("function".to_string()));
82 }
83
84 if let Some(name) = extract_class_name(trimmed) {
86 return (Some(name), Some("class".to_string()));
87 }
88
89 if let Some(name) = extract_method_name(trimmed) {
91 return (Some(name), Some("method".to_string()));
92 }
93 }
94
95 (None, None)
96}
97
98fn extract_function_name(line: &str) -> Option<String> {
99 if line.starts_with("fn ") {
101 return line
102 .strip_prefix("fn ")?
103 .split('(')
104 .next()
105 .map(|s| s.trim().to_string());
106 }
107
108 if line.starts_with("function ") {
110 return line
111 .strip_prefix("function ")?
112 .split('(')
113 .next()
114 .map(|s| s.trim().to_string());
115 }
116
117 if line.starts_with("def ") {
119 return line
120 .strip_prefix("def ")?
121 .split('(')
122 .next()
123 .map(|s| s.trim().to_string());
124 }
125
126 if line.starts_with("func ") {
128 return line
129 .strip_prefix("func ")?
130 .split('(')
131 .next()
132 .map(|s| s.trim().to_string());
133 }
134
135 if line.starts_with("const ") || line.starts_with("let ") || line.starts_with("var ") {
137 let rest = line.split_whitespace().nth(1)?;
138 if line.contains("=>") || line.contains("function") {
139 return Some(rest.trim_end_matches(|c| c == '=' || c == ' ').to_string());
140 }
141 }
142
143 if line.starts_with("export function ") {
145 return line
146 .strip_prefix("export function ")?
147 .split('(')
148 .next()
149 .map(|s| s.trim().to_string());
150 }
151
152 if line.starts_with("export const ") && (line.contains("=>") || line.contains("function")) {
154 return line
155 .strip_prefix("export const ")?
156 .split('=')
157 .next()
158 .map(|s| s.trim().to_string());
159 }
160
161 None
162}
163
164fn extract_class_name(line: &str) -> Option<String> {
165 if line.starts_with("class ") {
167 return line
168 .strip_prefix("class ")?
169 .split(|c| c == ' ' || c == '{' || c == '(' || c == ':')
170 .next()
171 .map(|s| s.trim().to_string());
172 }
173
174 if line.starts_with("struct ") || line.starts_with("pub struct ") {
176 let rest = if line.starts_with("pub ") {
177 line.strip_prefix("pub struct ")?
178 } else {
179 line.strip_prefix("struct ")?
180 };
181 return rest
182 .split(|c| c == ' ' || c == '{' || c == '(' || c == '<')
183 .next()
184 .map(|s| s.trim().to_string());
185 }
186
187 if line.starts_with("impl ") || line.starts_with("impl<") {
189 let rest = line.strip_prefix("impl")?;
190 let rest = rest
191 .trim_start_matches(|c: char| c == '<' || c.is_alphanumeric() || c == '_' || c == ',');
192 let rest = rest.trim_start_matches('>').trim();
193 return rest
194 .split(|c| c == ' ' || c == '{' || c == '<')
195 .next()
196 .map(|s| s.trim().to_string());
197 }
198
199 None
200}
201
202fn extract_method_name(line: &str) -> Option<String> {
203 if line.contains("pub ") && line.contains("fn ") {
205 let idx = line.find("fn ")?;
206 let rest = &line[idx + 3..];
207 return rest.split('(').next().map(|s| s.trim().to_string());
208 }
209
210 if line.trim().starts_with("async ") {
212 return line
213 .trim()
214 .strip_prefix("async ")?
215 .split('(')
216 .next()
217 .map(|s| s.trim().to_string());
218 }
219
220 None
221}