1use sem_core::model::entity::SemanticEntity;
2
3#[derive(Debug, Clone)]
5pub enum FileRegion {
6 Entity(EntityRegion),
7 Interstitial(InterstitialRegion),
8}
9
10impl FileRegion {
11 pub fn content(&self) -> &str {
12 match self {
13 FileRegion::Entity(e) => &e.content,
14 FileRegion::Interstitial(i) => &i.content,
15 }
16 }
17
18 pub fn key(&self) -> &str {
19 match self {
20 FileRegion::Entity(e) => &e.entity_id,
21 FileRegion::Interstitial(i) => &i.position_key,
22 }
23 }
24
25 pub fn is_entity(&self) -> bool {
26 matches!(self, FileRegion::Entity(_))
27 }
28}
29
30#[derive(Debug, Clone)]
31pub struct EntityRegion {
32 pub entity_id: String,
33 pub entity_name: String,
34 pub entity_type: String,
35 pub content: String,
36 pub start_line: usize,
37 pub end_line: usize,
38}
39
40#[derive(Debug, Clone)]
41pub struct InterstitialRegion {
42 pub position_key: String,
44 pub content: String,
45}
46
47pub fn extract_regions(content: &str, entities: &[SemanticEntity]) -> Vec<FileRegion> {
52 if entities.is_empty() {
53 return vec![FileRegion::Interstitial(InterstitialRegion {
55 position_key: "file_only".to_string(),
56 content: content.to_string(),
57 })];
58 }
59
60 let lines: Vec<&str> = content.lines().collect();
61 let total_lines = lines.len();
62
63 let mut sorted_entities: Vec<&SemanticEntity> = entities.iter().collect();
65 sorted_entities.sort_by_key(|e| e.start_line);
66
67 let mut regions: Vec<FileRegion> = Vec::new();
68 let mut current_line: usize = 0; for (i, entity) in sorted_entities.iter().enumerate() {
71 let entity_start = entity.start_line.saturating_sub(1); let entity_end = entity.end_line; let bundled_start = find_leading_comment_start(&lines, entity_start, current_line);
79
80 if current_line < bundled_start {
82 let interstitial_content = join_lines(&lines[current_line..bundled_start]);
83 let position_key = if i == 0 {
84 "file_header".to_string()
85 } else {
86 format!("between:{}:{}", sorted_entities[i - 1].id, entity.id)
87 };
88 regions.push(FileRegion::Interstitial(InterstitialRegion {
89 position_key,
90 content: interstitial_content,
91 }));
92 }
93
94 let entity_end_clamped = entity_end.min(total_lines);
96 let entity_content = if bundled_start < entity_end_clamped {
97 join_lines(&lines[bundled_start..entity_end_clamped])
98 } else {
99 entity.content.clone()
100 };
101
102 regions.push(FileRegion::Entity(EntityRegion {
103 entity_id: entity.id.clone(),
104 entity_name: entity.name.clone(),
105 entity_type: entity.entity_type.clone(),
106 content: entity_content,
107 start_line: entity.start_line,
108 end_line: entity.end_line,
109 }));
110
111 current_line = entity_end_clamped;
112 }
113
114 if current_line < total_lines {
116 let footer_content = join_lines(&lines[current_line..total_lines]);
117 regions.push(FileRegion::Interstitial(InterstitialRegion {
118 position_key: "file_footer".to_string(),
119 content: footer_content,
120 }));
121 }
122
123 if content.ends_with('\n') {
125 if let Some(last) = regions.last() {
126 if !last.content().ends_with('\n') {
127 match regions.last_mut() {
128 Some(FileRegion::Entity(e)) => e.content.push('\n'),
129 Some(FileRegion::Interstitial(i)) => i.content.push('\n'),
130 None => {}
131 }
132 }
133 }
134 }
135
136 regions
137}
138
139fn find_leading_comment_start(lines: &[&str], entity_start: usize, min_line: usize) -> usize {
150 if entity_start == 0 || entity_start <= min_line {
151 return entity_start;
152 }
153
154 let mut comment_start = entity_start;
155 let mut in_block_comment = false;
156
157 let mut line_idx = entity_start.saturating_sub(1);
159 loop {
160 if line_idx < min_line {
161 break;
162 }
163
164 let trimmed = lines[line_idx].trim();
165
166 if trimmed.is_empty() {
167 if comment_start == entity_start && line_idx + 1 == entity_start {
170 line_idx = line_idx.saturating_sub(1);
172 if line_idx < min_line {
173 break;
174 }
175 continue;
176 }
177 break;
178 }
179
180 if trimmed.ends_with("*/") && !trimmed.starts_with("/*") {
182 in_block_comment = true;
184 comment_start = line_idx;
185 if line_idx == min_line {
186 break;
187 }
188 line_idx -= 1;
189 continue;
190 }
191
192 if in_block_comment {
193 if trimmed.starts_with("/*") || trimmed.starts_with("/**") {
194 comment_start = line_idx;
195 in_block_comment = false;
196 }
197 if line_idx == min_line {
199 break;
200 }
201 line_idx -= 1;
202 continue;
203 }
204
205 if trimmed.starts_with("///") || trimmed.starts_with("//!") || trimmed.starts_with("/**") || trimmed.starts_with("* ") || trimmed == "*" || trimmed == "*/" {
213 comment_start = line_idx;
214 if line_idx == min_line {
215 break;
216 }
217 line_idx -= 1;
218 continue;
219 }
220
221 break;
223 }
224
225 comment_start
226}
227
228fn join_lines(lines: &[&str]) -> String {
229 if lines.is_empty() {
230 return String::new();
231 }
232 let mut result = lines.join("\n");
233 result.push('\n');
234 result
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240 use sem_core::parser::plugins::create_default_registry;
241
242 #[test]
243 fn test_extract_regions_typescript() {
244 let content = r#"import { foo } from 'bar';
245
246export function hello() {
247 return "hello";
248}
249
250export function world() {
251 return "world";
252}
253"#;
254
255 let registry = create_default_registry();
256 let plugin = registry.get_plugin("test.ts").unwrap();
257 let entities = plugin.extract_entities(content, "test.ts");
258
259 assert!(!entities.is_empty(), "Should extract entities from TypeScript");
260
261 let regions = extract_regions(content, &entities);
262
263 assert!(regions.len() >= 2, "Should have multiple regions, got {}", regions.len());
265
266 let entity_regions: Vec<_> = regions
268 .iter()
269 .filter_map(|r| match r {
270 FileRegion::Entity(e) => Some(e),
271 _ => None,
272 })
273 .collect();
274
275 let entity_names: Vec<&str> = entity_regions.iter().map(|e| e.entity_name.as_str()).collect();
276 assert!(entity_names.contains(&"hello"), "Should find hello function, got {:?}", entity_names);
277 assert!(entity_names.contains(&"world"), "Should find world function, got {:?}", entity_names);
278 }
279
280 #[test]
281 fn test_comment_bundling_jsdoc() {
282 let content = r#"import { foo } from 'bar';
284
285/**
286 * Greets a person by name.
287 * @param name - The person's name
288 */
289export function hello(name: string) {
290 return `Hello, ${name}!`;
291}
292
293export function world() {
294 return "world";
295}
296"#;
297
298 let registry = create_default_registry();
299 let plugin = registry.get_plugin("test.ts").unwrap();
300 let entities = plugin.extract_entities(content, "test.ts");
301
302 let _hello = entities.iter().find(|e| e.name == "hello").expect("Should find hello");
303 let regions = extract_regions(content, &entities);
304
305 let hello_region = regions.iter().find(|r| match r {
307 FileRegion::Entity(e) => e.entity_name == "hello",
308 _ => false,
309 }).expect("Should find hello region");
310
311 assert!(
313 hello_region.content().contains("/**"),
314 "hello region should include JSDoc comment. Content: {:?}",
315 hello_region.content(),
316 );
317 assert!(
318 hello_region.content().contains("@param name"),
319 "hello region should include JSDoc @param. Content: {:?}",
320 hello_region.content(),
321 );
322
323 let interstitials: Vec<_> = regions.iter().filter(|r| !r.is_entity()).collect();
325 for inter in &interstitials {
326 assert!(
327 !inter.content().contains("/**") || inter.content().contains("@param") == false,
328 "Interstitial should not contain the bundled JSDoc. Key: {:?}, Content: {:?}",
329 inter.key(), inter.content(),
330 );
331 }
332 }
333
334 #[test]
335 fn test_comment_bundling_rust_doc() {
336 let content = r#"use std::io;
337
338/// Adds two numbers together.
339///
340/// # Examples
341/// ```
342/// assert_eq!(add(1, 2), 3);
343/// ```
344fn add(a: i32, b: i32) -> i32 {
345 a + b
346}
347
348fn subtract(a: i32, b: i32) -> i32 {
349 a - b
350}
351"#;
352
353 let registry = create_default_registry();
354 let plugin = registry.get_plugin("test.rs").unwrap();
355 let entities = plugin.extract_entities(content, "test.rs");
356
357 let regions = extract_regions(content, &entities);
358 let add_region = regions.iter().find(|r| match r {
359 FileRegion::Entity(e) => e.entity_name == "add",
360 _ => false,
361 }).expect("Should find add region");
362
363 assert!(
364 add_region.content().contains("/// Adds two numbers"),
365 "add region should include Rust doc comment. Content: {:?}",
366 add_region.content(),
367 );
368 }
369
370 #[test]
371 fn test_extract_regions_no_entities() {
372 let content = "just some text\nno code here\n";
373 let regions = extract_regions(content, &[]);
374 assert_eq!(regions.len(), 1);
375 assert!(!regions[0].is_entity());
376 assert_eq!(regions[0].content(), content);
377 }
378}