1pub mod cache;
2pub mod language;
3
4use std::path::Path;
5
6use tree_sitter::{Query, QueryCursor, StreamingIterator};
7
8use cache::ASTCache;
9use language::{Lang, LanguageRegistry};
10
11#[derive(Debug, Clone)]
13pub struct Symbol {
14 pub name: String,
16 pub start_line: usize,
18 pub end_line: usize,
20 pub start_byte: usize,
22 pub end_byte: usize,
24 pub kind: String,
26}
27
28impl Symbol {
29 pub fn is_chinese(&self) -> bool {
31 contains_chinese(&self.name)
32 }
33
34 pub fn is_pinyin(&self) -> bool {
36 is_pinyin_identifier(&self.name)
37 }
38
39 pub fn is_chinese_related(&self) -> bool {
41 self.is_chinese() || self.is_pinyin()
42 }
43}
44
45fn is_chinese(c: char) -> bool {
47 matches!(c,
48 '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{20000}'..='\u{2A6DF}' | '\u{F900}'..='\u{FAFF}' | '\u{2F800}'..='\u{2FA1F}' )
54}
55
56fn contains_chinese(s: &str) -> bool {
58 s.chars().any(is_chinese)
59}
60
61fn is_pinyin_identifier(s: &str) -> bool {
63 if s.is_empty() {
64 return false;
65 }
66
67 let first = s.chars().next().unwrap();
69 if !first.is_ascii_alphabetic() {
70 return false;
71 }
72
73 let pinyin_syllables = [
78 "ba", "bai", "bei", "biao", "chang", "chu", "da", "dan", "di", "ding",
79 "dong", "duan", "duo", "er", "fen", "gao", "guo", "hao", "hou", "hu",
80 "huai", "ji", "jian", "jiu", "kuai", "kuan", "leng", "li", "lie", "lu",
81 "man", "miao", "ming", "mu", "nan", "nei", "nian", "qi", "qian", "re",
82 "ren", "ri", "san", "shang", "shao", "shen", "shi", "shu", "si", "tian",
83 "wai", "wan", "wen", "wu", "xi", "xia", "xiao", "xin", "xing", "yi",
84 "yong", "you", "yue", "zhai", "zhong", "zuo",
85 ];
86
87 let lower = s.to_lowercase();
88 let remaining_str = lower.as_str();
89
90 let mut pos = 0usize;
93 let mut consumed_count = 0usize;
94 let mut syllable_count = 0usize;
95
96 while pos < remaining_str.len() {
97 let mut matched_len = 0usize;
98 for len in (1..=5.min(remaining_str.len() - pos)).rev() {
99 let candidate = &remaining_str[pos..pos + len];
100 if pinyin_syllables.binary_search(&candidate).is_ok() {
101 matched_len = len;
102 break;
103 }
104 }
105 if matched_len > 0 {
106 pos += matched_len;
107 consumed_count += matched_len;
108 syllable_count += 1;
109 } else {
110 break;
111 }
112 }
113
114 syllable_count >= 2 && consumed_count as f64 / lower.len() as f64 > 0.8
118}
119
120pub struct SemanticSearcher {
122 cache: ASTCache,
123}
124
125impl SemanticSearcher {
126 pub fn new() -> Self {
127 Self {
128 cache: ASTCache::new(),
129 }
130 }
131
132 pub fn list_symbols(&mut self, path: &Path) -> Option<Vec<Symbol>> {
135 let source = std::fs::read_to_string(path).ok()?;
136
137 let lang = LanguageRegistry::detect(path);
138
139 if let Some(lang) = lang {
140 let mut symbols = self.list_symbols_treesitter(path, &source, lang)?;
141
142 if lang.is_vue() {
144 if let Some(html_symbols) = self.list_vue_template_symbols(&source) {
145 symbols.extend(html_symbols);
146 }
147 }
148
149 Some(symbols)
150 } else {
151 Some(self.list_symbols_indent(&source, path))
152 }
153 }
154
155 pub fn extract_symbol(&mut self, path: &Path, symbol_name: &str) -> Option<SymbolSlice> {
158 let source = std::fs::read_to_string(path).ok()?;
159 let lang = LanguageRegistry::detect(path)?;
160 let symbols = self.list_symbols_treesitter(path, &source, lang)?;
161
162 let sym = symbols.iter().find(|s| s.name == symbol_name)?;
164 let text = source[sym.start_byte..sym.end_byte].to_string();
165
166 Some(SymbolSlice {
167 name: sym.name.clone(),
168 kind: sym.kind.clone(),
169 start_line: sym.start_line,
170 end_line: sym.end_line,
171 start_byte: sym.start_byte,
172 end_byte: sym.end_byte,
173 text,
174 })
175 }
176
177 pub fn skeleton(&mut self, path: &Path) -> Option<String> {
179 let source = std::fs::read_to_string(path).ok()?;
180 let lang = LanguageRegistry::detect(path);
181
182 if let Some(lang) = lang {
183 self.skeleton_treesitter(path, &source, lang)
184 } else {
185 Some(self.skeleton_indent(&source, path))
186 }
187 }
188
189 pub fn invalidate(&mut self, path: &Path) {
191 self.cache.invalidate(path);
192 }
193
194 pub fn count_syntax_errors(&mut self, source: &str, path: &Path) -> (usize, Vec<usize>) {
197 let lang = match language::LanguageRegistry::detect(path) {
198 Some(l) => l,
199 None => return (0, vec![]),
200 };
201 let tree = match self.cache.parse_source(source, lang) {
202 Some(t) => t,
203 None => return (0, vec![]),
204 };
205
206 let mut errors = Vec::new();
207 Self::collect_errors(tree.root_node(), &mut errors);
208 let count = errors.len();
209 errors.truncate(5); (count, errors)
211 }
212
213 fn collect_errors(node: tree_sitter::Node, errors: &mut Vec<usize>) {
214 if node.is_error() || node.is_missing() {
215 errors.push(node.start_position().row + 1);
216 }
217 let mut cursor = node.walk();
218 if cursor.goto_first_child() {
219 loop {
220 Self::collect_errors(cursor.node(), errors);
221 if !cursor.goto_next_sibling() {
222 break;
223 }
224 }
225 }
226 }
227
228 pub fn find_similar_calls(&mut self, path: &Path, pattern: &str) -> Option<String> {
237 let source = std::fs::read_to_string(path).ok()?;
238 let lang = LanguageRegistry::detect(path)?;
239 let tree = self.cache.parse_source(&source, lang)?;
240
241 let pattern_lower = pattern.to_lowercase();
242 let mut results: Vec<(usize, String, String)> = Vec::new(); Self::walk_matching_calls(tree.root_node(), &source, &pattern_lower, &mut results, "");
245
246 if results.is_empty() {
247 return None;
248 }
249
250 let short_name = path
251 .file_name()
252 .map(|n| n.to_string_lossy().to_string())
253 .unwrap_or_else(|| path.to_string_lossy().to_string());
254
255 let mut out = format!(
256 "{} calls matching '{}' in {}:\n",
257 results.len(),
258 pattern,
259 short_name
260 );
261 for (line, call_text, func) in &results {
262 if func.is_empty() {
263 out.push_str(&format!(" L{}: {}\n", line, call_text));
264 } else {
265 out.push_str(&format!(" L{}: {} (in {})\n", line, call_text, func));
266 }
267 }
268 Some(out)
269 }
270
271 fn walk_matching_calls(
273 node: tree_sitter::Node,
274 source: &str,
275 pattern: &str,
276 results: &mut Vec<(usize, String, String)>,
277 enclosing_fn: &str,
278 ) {
279 let mut current_fn = enclosing_fn.to_string();
281 let kind = node.kind();
282 if kind.contains("function") || kind.contains("method") || kind == "constructor_declaration"
283 {
284 if let Some(name_node) = node.child_by_field_name("name") {
285 current_fn = source[name_node.start_byte()..name_node.end_byte()].to_string();
286 }
287 }
288
289 if kind == "method_invocation" || kind == "call_expression" {
291 let call_text = &source[node.start_byte()..node.end_byte()];
292 let short = if call_text.len() > 80 {
294 let mut end = 77;
295 while !call_text.is_char_boundary(end) {
296 end -= 1;
297 }
298 format!("{}...", &call_text[..end])
299 } else {
300 call_text.to_string()
301 };
302 let oneline = short.replace('\n', " ").replace(" ", " ");
304
305 if call_text.to_lowercase().contains(pattern) {
306 let line = node.start_position().row + 1;
307 results.push((line, oneline, current_fn.clone()));
308 }
309 }
310
311 let mut cursor = node.walk();
313 if cursor.goto_first_child() {
314 loop {
315 Self::walk_matching_calls(cursor.node(), source, pattern, results, ¤t_fn);
316 if !cursor.goto_next_sibling() {
317 break;
318 }
319 }
320 }
321 }
322
323 fn list_vue_template_symbols(&mut self, source: &str) -> Option<Vec<Symbol>> {
326 let template_start = source.find("<template")?;
328 let template_end = source.rfind("</template>")?;
329 if template_start >= template_end {
330 return None;
331 }
332
333 let template_content_start = source[template_start..].find('>')? + template_start + 1;
335 let template_content = &source[template_content_start..template_end];
336
337 let line_offset = source[..template_content_start].lines().count();
339
340 let html_grammar = Lang::html_grammar();
342 let mut parser = tree_sitter::Parser::new();
343 parser.set_language(&html_grammar).ok()?;
344 let tree = parser.parse(template_content, None)?;
345
346 let query_str = Lang::Html.symbols_query();
347 let query = tree_sitter::Query::new(&html_grammar, query_str).ok()?;
348 let mut cursor = tree_sitter::QueryCursor::new();
349 let mut matches = cursor.matches(&query, tree.root_node(), template_content.as_bytes());
350
351 let name_idx = query.capture_index_for_name("name")?;
352 let def_idx = query.capture_index_for_name("definition")?;
353
354 let mut symbols = Vec::new();
355 let mut seen_lines = std::collections::HashSet::new();
356
357 while let Some(m) = matches.next() {
358 let name_cap = match m.captures.iter().find(|c| c.index == name_idx) {
359 Some(c) => c,
360 None => continue,
361 };
362 let def_cap = match m.captures.iter().find(|c| c.index == def_idx) {
363 Some(c) => c,
364 None => continue,
365 };
366 let name_node = name_cap.node;
367 let def_node = def_cap.node;
368
369 let tag_name = &template_content[name_node.start_byte()..name_node.end_byte()];
370 let start_line = def_node.start_position().row + line_offset;
371
372 if matches!(
374 tag_name,
375 "div"
376 | "span"
377 | "p"
378 | "a"
379 | "li"
380 | "ul"
381 | "ol"
382 | "br"
383 | "hr"
384 | "img"
385 | "i"
386 | "b"
387 | "strong"
388 | "em"
389 | "small"
390 | "label"
391 | "input"
392 | "option"
393 | "thead"
394 | "tbody"
395 | "tr"
396 | "td"
397 | "th"
398 ) {
399 let line = template_content
401 .lines()
402 .nth(def_node.start_position().row)
403 .unwrap_or("");
404 let has_vue_attr = line.contains("v-if")
405 || line.contains("v-for")
406 || line.contains("v-show")
407 || line.contains("@click")
408 || line.contains("v-model");
409 if !has_vue_attr {
410 continue;
411 }
412 }
413
414 if !seen_lines.insert(start_line) {
416 continue;
417 }
418
419 let end_line = def_node.end_position().row + line_offset;
420 symbols.push(Symbol {
421 name: format!("<{}>", tag_name),
422 start_line,
423 end_line,
424 start_byte: def_node.start_byte() + template_content_start,
425 end_byte: def_node.end_byte() + template_content_start,
426 kind: "element".to_string(),
427 });
428
429 if symbols.len() >= 20 {
430 break;
431 } }
433
434 if symbols.is_empty() {
435 None
436 } else {
437 Some(symbols)
438 }
439 }
440
441 fn list_symbols_treesitter(
444 &mut self,
445 path: &Path,
446 source: &str,
447 lang: Lang,
448 ) -> Option<Vec<Symbol>> {
449 if lang == Lang::Vue {
451 return self.list_symbols_vue(path, source);
452 }
453
454 let tree = self.cache.parse_source(source, lang)?;
455 let query_src = lang.symbols_query();
456 let grammar = lang.grammar();
457 let query = Query::new(&grammar, query_src).ok()?;
458
459 let def_idx = query.capture_index_for_name("definition")?;
460 let name_idx = query.capture_index_for_name("name")?;
461
462 let mut cursor = QueryCursor::new();
463
464 let mut symbols = Vec::new();
465 let mut seen_ranges: std::collections::HashSet<(usize, usize)> =
466 std::collections::HashSet::new();
467
468 let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
469 loop {
470 matches.advance();
471 let m = match matches.get() {
472 Some(m) => m,
473 None => break,
474 };
475
476 let mut sym_name = None;
477 let mut def_start = 0usize;
478 let mut def_end = 0usize;
479 let mut def_start_row = 0usize;
480 let mut def_end_row = 0usize;
481 let mut def_kind = "";
482 let mut has_def = false;
483
484 for capture in m.captures {
485 if capture.index == name_idx {
486 sym_name = Some(
487 source[capture.node.start_byte()..capture.node.end_byte()].to_string(),
488 );
489 }
490 if capture.index == def_idx {
491 def_start = capture.node.start_byte();
492 def_end = capture.node.end_byte();
493 def_start_row = capture.node.start_position().row;
494 def_end_row = capture.node.end_position().row;
495 def_kind = capture.node.kind();
496 has_def = true;
497 }
498 }
499
500 if let (Some(name), true) = (sym_name, has_def) {
501 let range = (def_start, def_end);
502 if seen_ranges.contains(&range) {
503 continue;
504 }
505 seen_ranges.insert(range);
506
507 symbols.push(Symbol {
508 name,
509 start_line: def_start_row + 1,
510 end_line: def_end_row + 1,
511 start_byte: def_start,
512 end_byte: def_end,
513 kind: def_kind.to_string(),
514 });
515 }
516 }
517
518 Some(symbols)
519 }
520
521 fn skeleton_treesitter(&mut self, path: &Path, source: &str, lang: Lang) -> Option<String> {
522 let symbols = self.list_symbols_treesitter(path, source, lang)?;
523 let lines: Vec<&str> = source.lines().collect();
524 let mut out = String::new();
525
526 for (i, line) in lines.iter().enumerate() {
528 let trimmed = line.trim();
529 if trimmed.starts_with("use ")
530 || trimmed.starts_with("import ")
531 || trimmed.starts_with("from ")
532 || trimmed.starts_with("#include")
533 || trimmed.starts_with("package ")
534 || trimmed.starts_with("require")
535 {
536 out.push_str(&format!("{:4}| {}\n", i + 1, line));
537 }
538 }
539
540 if !out.is_empty() {
541 out.push('\n');
542 }
543
544 for sym in &symbols {
545 let sig_line = if sym.start_line <= lines.len() {
547 lines[sym.start_line - 1]
548 } else {
549 &sym.name
550 };
551
552 let line_range = format!("L{}-{}", sym.start_line, sym.end_line);
553 let body_lines = sym.end_line - sym.start_line + 1;
554
555 out.push_str(&format!(
556 "{:4}| {} {{ ... }} // {} ({} lines)\n",
557 sym.start_line,
558 sig_line.trim_end(),
559 line_range,
560 body_lines
561 ));
562 }
563
564 Some(out)
565 }
566
567 fn extract_script_section(source: &str) -> Option<(String, usize, usize)> {
571 let script_start = source.find("<script")?;
573 let tag_end = source[script_start..].find('>')? + script_start + 1;
574 let script_end = source[tag_end..].find("</script>")? + tag_end;
576 let script_content = &source[tag_end..script_end];
577
578 let line_offset = source[..tag_end].lines().count();
580 let byte_offset = tag_end;
581
582 Some((script_content.to_string(), line_offset, byte_offset))
583 }
584
585 fn list_symbols_vue(&mut self, _path: &Path, source: &str) -> Option<Vec<Symbol>> {
586 let (script, line_offset, byte_offset) = Self::extract_script_section(source)?;
587 let tree = self.cache.parse_source(&script, Lang::Vue)?;
588 let query_src = Lang::Vue.symbols_query();
589 let grammar = Lang::Vue.grammar();
590 let query = Query::new(&grammar, query_src).ok()?;
591
592 let def_idx = query.capture_index_for_name("definition")?;
593 let name_idx = query.capture_index_for_name("name")?;
594
595 let mut cursor = QueryCursor::new();
596 let mut symbols = Vec::new();
597 let mut seen_ranges: std::collections::HashSet<(usize, usize)> =
598 std::collections::HashSet::new();
599
600 let mut matches = cursor.matches(&query, tree.root_node(), script.as_bytes());
601 loop {
602 matches.advance();
603 let m = match matches.get() {
604 Some(m) => m,
605 None => break,
606 };
607
608 let mut sym_name = None;
609 let mut def_start = 0usize;
610 let mut def_end = 0usize;
611 let mut def_start_row = 0usize;
612 let mut def_end_row = 0usize;
613 let mut def_kind = "";
614 let mut has_def = false;
615
616 for capture in m.captures {
617 if capture.index == name_idx {
618 sym_name = Some(
619 script[capture.node.start_byte()..capture.node.end_byte()].to_string(),
620 );
621 }
622 if capture.index == def_idx {
623 def_start = capture.node.start_byte();
624 def_end = capture.node.end_byte();
625 def_start_row = capture.node.start_position().row;
626 def_end_row = capture.node.end_position().row;
627 def_kind = capture.node.kind();
628 has_def = true;
629 }
630 }
631
632 if let (Some(name), true) = (sym_name, has_def) {
633 let range = (def_start, def_end);
634 if seen_ranges.contains(&range) {
635 continue;
636 }
637 seen_ranges.insert(range);
638
639 symbols.push(Symbol {
640 name,
641 start_line: def_start_row + line_offset,
643 end_line: def_end_row + line_offset,
644 start_byte: def_start + byte_offset,
645 end_byte: def_end + byte_offset,
646 kind: def_kind.to_string(),
647 });
648 }
649 }
650
651 let lines: Vec<&str> = source.lines().collect();
655 for (i, line) in lines.iter().enumerate() {
656 let trimmed = line.trim();
657 if trimmed.starts_with("<template")
658 || trimmed.starts_with("<script")
659 || trimmed.starts_with("<style")
660 {
661 let tag = if trimmed.starts_with("<template") {
662 "template"
663 } else if trimmed.starts_with("<script") {
664 "script"
665 } else {
666 "style"
667 };
668 let close_tag = format!("</{}>", tag);
669 let end_line = lines[i..]
670 .iter()
671 .position(|l| l.trim().starts_with(&close_tag))
672 .map(|p| i + p + 1)
673 .unwrap_or(lines.len());
674 let start_byte = lines[..i].iter().map(|l| l.len() + 1).sum::<usize>();
675 let end_byte = lines[..end_line].iter().map(|l| l.len() + 1).sum::<usize>();
676 symbols.push(Symbol {
677 name: format!("<{}>", tag),
678 start_line: i + 1,
679 end_line,
680 start_byte,
681 end_byte,
682 kind: "sfc_section".to_string(),
683 });
684 }
685 }
686
687 symbols.sort_by_key(|s| s.start_line);
688 Some(symbols)
689 }
690
691 fn list_symbols_indent(&self, source: &str, path: &Path) -> Vec<Symbol> {
697 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
698 let lines: Vec<&str> = source.lines().collect();
699
700 match ext {
701 "css" | "scss" | "less" | "sass" => self.list_symbols_css(&lines),
702 "html" | "htm" => self.list_symbols_html(&lines),
703 "json" => self.list_symbols_json(&lines),
704 "yaml" | "yml" | "toml" => self.list_symbols_yaml(&lines),
705 "md" | "mdx" => self.list_symbols_markdown(&lines),
706 _ => self.list_symbols_code_indent(&lines),
707 }
708 }
709
710 fn list_symbols_css(&self, lines: &[&str]) -> Vec<Symbol> {
712 let mut symbols = Vec::new();
713 for (i, line) in lines.iter().enumerate() {
714 let trimmed = line.trim();
715 if trimmed.is_empty() {
716 continue;
717 }
718 let indent = line.len() - line.trim_start().len();
719 let is_match = trimmed.starts_with(":root")
720 || trimmed.starts_with("@keyframes")
721 || trimmed.starts_with("@media")
722 || trimmed.starts_with("@layer")
723 || trimmed.starts_with("@import")
724 || trimmed.starts_with("@font-face")
725 || trimmed.starts_with("/* ===")
726 || trimmed.starts_with("/* ---")
727 || trimmed.starts_with("/* ***")
728 || (indent == 0 && trimmed.starts_with('.') && trimmed.contains('{'))
729 || (indent == 0 && trimmed.starts_with('#') && trimmed.contains('{'));
730
731 if is_match {
732 let end = find_block_end(lines, i);
734 let name = trimmed
735 .split('{')
736 .next()
737 .unwrap_or(trimmed)
738 .trim()
739 .to_string();
740 symbols.push(make_symbol(name, "css_rule", i, end, lines));
741 }
742 }
743 symbols
744 }
745
746 fn list_symbols_html(&self, lines: &[&str]) -> Vec<Symbol> {
748 let mut symbols = Vec::new();
749 let tags = [
750 "<head",
751 "<body",
752 "<header",
753 "<main",
754 "<footer",
755 "<nav",
756 "<section",
757 "<article",
758 "<!DOCTYPE",
759 ];
760 for (i, line) in lines.iter().enumerate() {
761 let trimmed = line.trim();
762 if tags.iter().any(|t| trimmed.starts_with(t)) {
763 let name = trimmed
764 .split(|c: char| c == '>' || c == ' ')
765 .next()
766 .unwrap_or(trimmed)
767 .to_string();
768 symbols.push(make_symbol(name, "html_tag", i, i + 1, lines));
769 }
770 }
771 symbols
772 }
773
774 fn list_symbols_json(&self, lines: &[&str]) -> Vec<Symbol> {
776 let mut symbols = Vec::new();
777 for (i, line) in lines.iter().enumerate() {
778 let trimmed = line.trim();
779 let indent = line.len() - line.trim_start().len();
780 if indent <= 2 && trimmed.starts_with('"') && trimmed.contains(':') {
782 let name = trimmed
783 .split(':')
784 .next()
785 .unwrap_or(trimmed)
786 .trim_matches('"')
787 .trim()
788 .to_string();
789 symbols.push(make_symbol(name, "json_key", i, i + 1, lines));
790 }
791 }
792 symbols
793 }
794
795 fn list_symbols_yaml(&self, lines: &[&str]) -> Vec<Symbol> {
797 let mut symbols = Vec::new();
798 for (i, line) in lines.iter().enumerate() {
799 let trimmed = line.trim();
800 let indent = line.len() - line.trim_start().len();
801 if indent == 0
802 && !trimmed.is_empty()
803 && !trimmed.starts_with('#')
804 && !trimmed.starts_with("---")
805 {
806 let name = trimmed
807 .split(':')
808 .next()
809 .unwrap_or(trimmed)
810 .trim()
811 .to_string();
812 if !name.is_empty() {
813 symbols.push(make_symbol(name, "yaml_key", i, i + 1, lines));
814 }
815 }
816 }
817 symbols
818 }
819
820 fn list_symbols_markdown(&self, lines: &[&str]) -> Vec<Symbol> {
822 let mut symbols = Vec::new();
823 for (i, line) in lines.iter().enumerate() {
824 let trimmed = line.trim();
825 if trimmed.starts_with('#') {
826 let name = trimmed.trim_start_matches('#').trim().to_string();
827 let end = lines[i + 1..]
829 .iter()
830 .position(|l| l.trim().starts_with('#'))
831 .map(|p| i + 1 + p)
832 .unwrap_or(lines.len());
833 symbols.push(make_symbol(name, "heading", i, end, lines));
834 }
835 }
836 symbols
837 }
838
839 fn list_symbols_code_indent(&self, lines: &[&str]) -> Vec<Symbol> {
841 let mut symbols = Vec::new();
842
843 for (i, line) in lines.iter().enumerate() {
847 let trimmed = line.trim();
848 if trimmed.is_empty() {
849 continue;
850 }
851 let indent = line.len() - line.trim_start().len();
852 if indent <= 8 && contains_chinese(trimmed) {
853 if let Some(eq_pos) = trimmed.find('=') {
854 let var_name = trimmed[..eq_pos].trim();
855 if contains_chinese(var_name) && !var_name.contains(' ') {
856 symbols.push(make_symbol(
857 var_name.to_string(),
858 "chinese_variable",
859 i,
860 i + 1,
861 lines,
862 ));
863 }
864 }
865 }
866 }
867
868 let mut i = 0;
870 while i < lines.len() {
871 let line = lines[i];
872 let trimmed = line.trim();
873
874 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
875 i += 1;
876 continue;
877 }
878
879 let indent = line.len() - line.trim_start().len();
880 if indent == 0 && !trimmed.starts_with('}') && !trimmed.starts_with(')') {
881 let is_def = trimmed.starts_with("fn ")
882 || trimmed.starts_with("pub ")
883 || trimmed.starts_with("def ")
884 || trimmed.starts_with("class ")
885 || trimmed.starts_with("function ")
886 || trimmed.starts_with("func ")
887 || trimmed.starts_with("type ")
888 || trimmed.starts_with("struct ")
889 || trimmed.starts_with("enum ")
890 || trimmed.starts_with("interface ")
891 || trimmed.starts_with("impl ")
892 || trimmed.starts_with("trait ")
893 || trimmed.starts_with("const ")
894 || trimmed.starts_with("export ")
895 || trimmed.starts_with("async ")
896 || trimmed.starts_with("public ")
897 || trimmed.starts_with("private ")
898 || trimmed.starts_with("protected ");
899
900 if is_def {
901 let start = i;
902 let mut end = i + 1;
903 while end < lines.len() {
904 let next = lines[end];
905 let next_trimmed = next.trim();
906 if next_trimmed.is_empty() {
907 end += 1;
908 continue;
909 }
910 let next_indent = next.len() - next.trim_start().len();
911 if next_indent == 0 && !next_trimmed.starts_with('}') {
912 break;
913 }
914 end += 1;
915 }
916 if end < lines.len() && lines[end].trim() == "}" {
917 end += 1;
918 }
919
920 let name = extract_indent_name(trimmed);
921 symbols.push(make_symbol(name, "indent_block", start, end, lines));
922
923 i = end;
924 continue;
925 }
926 }
927
928 i += 1;
929 }
930
931 symbols
932 }
933
934 fn skeleton_indent(&self, source: &str, path: &Path) -> String {
935 let symbols = self.list_symbols_indent(source, path);
936 let lines: Vec<&str> = source.lines().collect();
937 let mut out = String::new();
938
939 for sym in &symbols {
940 if sym.start_line <= lines.len() {
941 let sig = lines[sym.start_line - 1];
942 let body_lines = sym.end_line - sym.start_line + 1;
943 out.push_str(&format!(
944 "{:4}| {} // L{}-{} ({} lines)\n",
945 sym.start_line,
946 sig.trim_end(),
947 sym.start_line,
948 sym.end_line,
949 body_lines
950 ));
951 }
952 }
953
954 out
955 }
956}
957
958#[derive(Debug, Clone)]
960pub struct SymbolSlice {
961 pub name: String,
962 pub kind: String,
963 pub start_line: usize,
964 pub end_line: usize,
965 pub start_byte: usize,
966 pub end_byte: usize,
967 pub text: String,
968}
969
970fn make_symbol(name: String, kind: &str, start: usize, end: usize, lines: &[&str]) -> Symbol {
972 let start_byte = lines[..start].iter().map(|l| l.len() + 1).sum::<usize>();
973 let end_byte = lines[..end].iter().map(|l| l.len() + 1).sum::<usize>();
974 Symbol {
975 name,
976 start_line: start + 1,
977 end_line: end,
978 start_byte,
979 end_byte,
980 kind: kind.to_string(),
981 }
982}
983
984fn find_block_end(lines: &[&str], start: usize) -> usize {
986 let mut depth = 0i32;
987 for i in start..lines.len() {
988 for ch in lines[i].chars() {
989 if ch == '{' {
990 depth += 1;
991 }
992 if ch == '}' {
993 depth -= 1;
994 }
995 }
996 if depth <= 0 && i > start {
997 return i + 1;
998 }
999 }
1000 (start + 1).min(lines.len())
1001}
1002
1003fn extract_indent_name(line: &str) -> String {
1005 let tokens: Vec<&str> = line.split_whitespace().collect();
1006 for (i, tok) in tokens.iter().enumerate() {
1008 if i == 0 {
1009 continue; }
1011 let clean = tok
1013 .trim_start_matches('*')
1014 .trim_end_matches(|c: char| "({:<".contains(c));
1015 if !clean.is_empty()
1016 && clean
1017 .chars()
1018 .next()
1019 .map_or(false, |c| c.is_alphabetic() || c == '_')
1020 {
1021 return clean.to_string();
1022 }
1023 }
1024 tokens.first().unwrap_or(&"unknown").to_string()
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029 use super::*;
1030 use std::io::Write;
1031
1032 #[test]
1033 fn test_language_detection() {
1034 assert_eq!(
1035 LanguageRegistry::detect(Path::new("foo.rs")),
1036 Some(Lang::Rust)
1037 );
1038 assert_eq!(
1039 LanguageRegistry::detect(Path::new("bar.py")),
1040 Some(Lang::Python)
1041 );
1042 assert_eq!(
1043 LanguageRegistry::detect(Path::new("baz.js")),
1044 Some(Lang::JavaScript)
1045 );
1046 assert_eq!(
1047 LanguageRegistry::detect(Path::new("qux.ts")),
1048 Some(Lang::TypeScript)
1049 );
1050 assert_eq!(
1051 LanguageRegistry::detect(Path::new("main.go")),
1052 Some(Lang::Go)
1053 );
1054 assert_eq!(
1055 LanguageRegistry::detect(Path::new("App.java")),
1056 Some(Lang::Java)
1057 );
1058 assert_eq!(LanguageRegistry::detect(Path::new("main.c")), Some(Lang::C));
1059 assert_eq!(
1060 LanguageRegistry::detect(Path::new("main.cpp")),
1061 Some(Lang::Cpp)
1062 );
1063 assert_eq!(
1064 LanguageRegistry::detect(Path::new("Program.cs")),
1065 Some(Lang::CSharp)
1066 );
1067 assert_eq!(
1068 LanguageRegistry::detect(Path::new("index.php")),
1069 Some(Lang::Php)
1070 );
1071 assert_eq!(LanguageRegistry::detect(Path::new("readme.md")), None);
1072 }
1073
1074 #[test]
1075 fn test_list_symbols_rust() {
1076 let mut searcher = SemanticSearcher::new();
1077 let source = r#"
1078pub fn hello() {
1079 println!("hello");
1080}
1081
1082pub struct Point {
1083 x: f64,
1084 y: f64,
1085}
1086
1087impl Point {
1088 pub fn new(x: f64, y: f64) -> Self {
1089 Self { x, y }
1090 }
1091}
1092"#;
1093 let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
1094 tmp.write_all(source.as_bytes()).unwrap();
1095
1096 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1097 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1098 assert!(names.contains(&"hello"), "symbols: {:?}", names);
1099 assert!(names.contains(&"Point"), "symbols: {:?}", names);
1100 }
1101
1102 #[test]
1103 fn test_extract_symbol_rust() {
1104 let mut searcher = SemanticSearcher::new();
1105 let source = r#"pub fn add(a: i32, b: i32) -> i32 {
1106 a + b
1107}
1108
1109pub fn sub(a: i32, b: i32) -> i32 {
1110 a - b
1111}
1112"#;
1113 let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
1114 tmp.write_all(source.as_bytes()).unwrap();
1115
1116 let slice = searcher.extract_symbol(tmp.path(), "add").unwrap();
1117 assert!(slice.text.contains("a + b"), "text: {}", slice.text);
1118 assert!(!slice.text.contains("a - b"), "should not contain sub");
1119 }
1120
1121 #[test]
1122 fn test_skeleton_rust() {
1123 let mut searcher = SemanticSearcher::new();
1124 let source = r#"use std::io;
1125
1126pub fn hello() {
1127 println!("hello");
1128}
1129
1130pub fn world() {
1131 println!("world");
1132}
1133"#;
1134 let mut tmp = tempfile::NamedTempFile::with_suffix(".rs").unwrap();
1135 tmp.write_all(source.as_bytes()).unwrap();
1136
1137 let skel = searcher.skeleton(tmp.path()).unwrap();
1138 assert!(skel.contains("hello"), "skeleton: {}", skel);
1139 assert!(skel.contains("world"), "skeleton: {}", skel);
1140 assert!(skel.contains("use std::io"), "skeleton: {}", skel);
1141 }
1142
1143 #[test]
1144 fn test_list_symbols_python() {
1145 let mut searcher = SemanticSearcher::new();
1146 let source = r#"
1147def greet(name):
1148 print(f"hello {name}")
1149
1150class Calculator:
1151 def add(self, a, b):
1152 return a + b
1153"#;
1154 let mut tmp = tempfile::NamedTempFile::with_suffix(".py").unwrap();
1155 tmp.write_all(source.as_bytes()).unwrap();
1156
1157 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1158 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1159 assert!(names.contains(&"greet"), "symbols: {:?}", names);
1160 assert!(names.contains(&"Calculator"), "symbols: {:?}", names);
1161 }
1162
1163 #[test]
1164 fn test_list_symbols_csharp() {
1165 let mut searcher = SemanticSearcher::new();
1166 let source = r#"
1167class Program {
1168 Program() {}
1169
1170 public static void Main(string[] args) {
1171 }
1172}
1173
1174interface IGreeter {
1175 void Greet();
1176}
1177"#;
1178 let mut tmp = tempfile::NamedTempFile::with_suffix(".cs").unwrap();
1179 tmp.write_all(source.as_bytes()).unwrap();
1180
1181 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1182 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1183 assert!(names.contains(&"Program"), "symbols: {:?}", names);
1184 assert!(names.contains(&"Main"), "symbols: {:?}", names);
1185 assert!(names.contains(&"IGreeter"), "symbols: {:?}", names);
1186 }
1187
1188 #[test]
1189 fn test_list_symbols_php() {
1190 let mut searcher = SemanticSearcher::new();
1191 let source = r#"
1192<?php
1193
1194class Calculator {
1195 public function add($a, $b) {
1196 return $a + $b;
1197 }
1198}
1199
1200function greet($name) {
1201 return "Hello, $name";
1202}
1203
1204interface Printable {
1205 public function print();
1206}
1207"#;
1208 let mut tmp = tempfile::NamedTempFile::with_suffix(".php").unwrap();
1209 tmp.write_all(source.as_bytes()).unwrap();
1210
1211 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1212 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1213 assert!(names.contains(&"Calculator"), "php: {:?}", names);
1214 assert!(names.contains(&"add"), "php: {:?}", names);
1215 assert!(names.contains(&"greet"), "php: {:?}", names);
1216 assert!(names.contains(&"Printable"), "php: {:?}", names);
1217 }
1218
1219 #[test]
1220 fn test_indent_fallback() {
1221 let mut searcher = SemanticSearcher::new();
1222 let source = r#"
1223def hello():
1224 print("hello")
1225
1226def world():
1227 print("world")
1228"#;
1229 let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
1231 tmp.write_all(source.as_bytes()).unwrap();
1232
1233 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1234 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1235 assert!(
1236 names.contains(&"hello()"),
1237 "indent fallback symbols: {:?}",
1238 names
1239 );
1240 }
1241
1242 #[test]
1243 fn test_chinese_character_detection() {
1244 assert!(is_chinese('中'));
1245 assert!(is_chinese('文'));
1246 assert!(!is_chinese('a'));
1247 assert!(!is_chinese('1'));
1248 assert!(!is_chinese('_'));
1249 }
1250
1251 #[test]
1252 fn test_contains_chinese() {
1253 assert!(contains_chinese("用户名"));
1254 assert!(contains_chinese("hello世界"));
1255 assert!(!contains_chinese("hello"));
1256 assert!(!contains_chinese("123"));
1257 }
1258
1259 #[test]
1260 fn test_pinyin_identifier_detection() {
1261 assert!(is_pinyin_identifier("yonghuMing"));
1263 assert!(is_pinyin_identifier("dingdanList"));
1264 assert!(is_pinyin_identifier("zhongguoRen"));
1265 assert!(is_pinyin_identifier("wenjianMuLu"));
1266
1267 assert!(!is_pinyin_identifier("hello"));
1269 assert!(!is_pinyin_identifier("getUser"));
1270 assert!(!is_pinyin_identifier(""));
1271 assert!(!is_pinyin_identifier("123"));
1272 }
1273
1274 #[test]
1275 fn test_symbol_chinese_detection() {
1276 let sym = Symbol {
1277 name: "用户名".to_string(),
1278 start_line: 1,
1279 end_line: 1,
1280 start_byte: 0,
1281 end_byte: 9,
1282 kind: "variable".to_string(),
1283 };
1284 assert!(sym.is_chinese());
1285 assert!(!sym.is_pinyin());
1286 assert!(sym.is_chinese_related());
1287
1288 let sym_pinyin = Symbol {
1289 name: "yonghuMing".to_string(),
1290 start_line: 1,
1291 end_line: 1,
1292 start_byte: 0,
1293 end_byte: 10,
1294 kind: "variable".to_string(),
1295 };
1296 assert!(!sym_pinyin.is_chinese());
1297 assert!(sym_pinyin.is_pinyin());
1298 assert!(sym_pinyin.is_chinese_related());
1299
1300 let sym_english = Symbol {
1301 name: "getUser".to_string(),
1302 start_line: 1,
1303 end_line: 1,
1304 start_byte: 0,
1305 end_byte: 7,
1306 kind: "function".to_string(),
1307 };
1308 assert!(!sym_english.is_chinese());
1309 assert!(!sym_english.is_pinyin());
1310 assert!(!sym_english.is_chinese_related());
1311 }
1312
1313 #[test]
1314 fn test_chinese_variable_extraction() {
1315 let mut searcher = SemanticSearcher::new();
1316 let source = r#"用户名 = "张三"
1317年龄 = 25
1318def get_user():
1319 return 用户名
1320"#;
1321 let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
1322 tmp.write_all(source.as_bytes()).unwrap();
1323
1324 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1325 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1326 assert!(names.contains(&"用户名"), "symbols: {:?}", names);
1327 }
1328
1329 #[test]
1330 fn test_mixed_chinese_english_detection() {
1331 assert!(contains_chinese("getUser用户名"));
1333 assert!(contains_chinese("query_订单列表"));
1334 assert!(contains_chinese("test数据"));
1335 assert!(contains_chinese("order详情"));
1336
1337 let sym_mixed1 = Symbol {
1339 name: "getUser用户名".to_string(),
1340 start_line: 1,
1341 end_line: 1,
1342 start_byte: 0,
1343 end_byte: 0,
1344 kind: "variable".to_string(),
1345 };
1346 assert!(sym_mixed1.is_chinese_related());
1347
1348 let sym_mixed2 = Symbol {
1349 name: "query_订单列表".to_string(),
1350 start_line: 1,
1351 end_line: 1,
1352 start_byte: 0,
1353 end_byte: 0,
1354 kind: "variable".to_string(),
1355 };
1356 assert!(sym_mixed2.is_chinese_related());
1357
1358 assert!(!contains_chinese("getUser"));
1360 assert!(!contains_chinese("queryOrderList"));
1361 }
1362
1363 #[test]
1364 fn test_mixed_content_extraction() {
1365 let mut searcher = SemanticSearcher::new();
1366 let source = r#"getUser用户名 = "张三"
1367query_订单列表 = []
1368test数据 = 42
1369"#;
1370 let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
1371 tmp.write_all(source.as_bytes()).unwrap();
1372
1373 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1374 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1375 assert!(names.contains(&"getUser用户名"), "symbols: {:?}", names);
1376 assert!(names.contains(&"query_订单列表"), "symbols: {:?}", names);
1377 assert!(names.contains(&"test数据"), "symbols: {:?}", names);
1378 }
1379
1380 #[test]
1381 fn test_chinese_variable_nested_indent() {
1382 let mut searcher = SemanticSearcher::new();
1384 let source = r#"def process():
1385 用户名 = "张三"
1386 订单列表 = []
1387 if True:
1388 配置项 = "value"
1389"#;
1390 let mut tmp = tempfile::NamedTempFile::with_suffix(".txt").unwrap();
1391 tmp.write_all(source.as_bytes()).unwrap();
1392
1393 let symbols = searcher.list_symbols(tmp.path()).unwrap();
1394 let names: Vec<&str> = symbols.iter().map(|s| s.name.as_str()).collect();
1395 assert!(names.contains(&"用户名"), "nested symbols: {:?}", names);
1396 assert!(names.contains(&"订单列表"), "nested symbols: {:?}", names);
1397 assert!(names.contains(&"配置项"), "nested symbols: {:?}", names);
1398 }
1399}