codeprysm_core/
parser.rs

1//! Tree-Sitter Parser for Code Graph Generation
2//!
3//! This module provides tree-sitter based parsing for extracting code entities
4//! and their relationships from source files.
5//!
6//! ## Supported Languages
7//!
8//! - Python (.py)
9//! - JavaScript (.js, .mjs, .cjs)
10//! - TypeScript (.ts, .tsx)
11//! - Rust (.rs)
12//! - Go (.go)
13//! - C (.c, .h)
14//! - C++ (.cpp, .hpp, .cc, .cxx)
15//! - C# (.cs)
16
17use std::collections::HashMap;
18use std::path::Path;
19use std::sync::OnceLock;
20
21use thiserror::Error;
22use tree_sitter::{Language, Parser, Query, QueryCursor, StreamingIterator, Tree};
23
24// ============================================================================
25// Supported Languages
26// ============================================================================
27
28/// Supported programming languages for parsing.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum SupportedLanguage {
31    Python,
32    JavaScript,
33    TypeScript,
34    Tsx,
35    Rust,
36    Go,
37    C,
38    Cpp,
39    CSharp,
40}
41
42impl SupportedLanguage {
43    /// Get the language name as used in SCM query file names.
44    pub fn as_str(&self) -> &'static str {
45        match self {
46            SupportedLanguage::Python => "python",
47            SupportedLanguage::JavaScript => "javascript",
48            SupportedLanguage::TypeScript => "typescript",
49            SupportedLanguage::Tsx => "typescript", // TSX uses TypeScript queries
50            SupportedLanguage::Rust => "rust",
51            SupportedLanguage::Go => "go",
52            SupportedLanguage::C => "c",
53            SupportedLanguage::Cpp => "cpp",
54            SupportedLanguage::CSharp => "csharp",
55        }
56    }
57
58    /// Get the tree-sitter Language for this language.
59    pub fn tree_sitter_language(&self) -> Language {
60        match self {
61            SupportedLanguage::Python => tree_sitter_python::LANGUAGE.into(),
62            SupportedLanguage::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
63            SupportedLanguage::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
64            SupportedLanguage::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
65            SupportedLanguage::Rust => tree_sitter_rust::LANGUAGE.into(),
66            SupportedLanguage::Go => tree_sitter_go::LANGUAGE.into(),
67            SupportedLanguage::C => tree_sitter_c::LANGUAGE.into(),
68            SupportedLanguage::Cpp => tree_sitter_cpp::LANGUAGE.into(),
69            SupportedLanguage::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
70        }
71    }
72
73    /// Detect language from file extension.
74    ///
75    /// Returns `None` if the extension is not recognized.
76    pub fn from_extension(ext: &str) -> Option<Self> {
77        get_extension_map()
78            .get(ext.to_lowercase().as_str())
79            .copied()
80    }
81
82    /// Detect language from file path.
83    ///
84    /// Returns `None` if the file extension is not recognized.
85    pub fn from_path(path: &Path) -> Option<Self> {
86        path.extension()
87            .and_then(|e| e.to_str())
88            .and_then(Self::from_extension)
89    }
90
91    /// Get all supported file extensions.
92    pub fn all_extensions() -> &'static [&'static str] {
93        &[
94            "py", "js", "mjs", "cjs", "ts", "tsx", "rs", "go", "c", "h", "cpp", "hpp", "cc", "cxx",
95            "cs",
96        ]
97    }
98}
99
100impl std::fmt::Display for SupportedLanguage {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        write!(f, "{}", self.as_str())
103    }
104}
105
106/// Static extension to language mapping.
107static EXTENSION_MAP: OnceLock<HashMap<&'static str, SupportedLanguage>> = OnceLock::new();
108
109fn get_extension_map() -> &'static HashMap<&'static str, SupportedLanguage> {
110    EXTENSION_MAP.get_or_init(|| {
111        let mut map = HashMap::new();
112        // Python
113        map.insert("py", SupportedLanguage::Python);
114        // JavaScript
115        map.insert("js", SupportedLanguage::JavaScript);
116        map.insert("mjs", SupportedLanguage::JavaScript);
117        map.insert("cjs", SupportedLanguage::JavaScript);
118        // TypeScript
119        map.insert("ts", SupportedLanguage::TypeScript);
120        map.insert("tsx", SupportedLanguage::Tsx);
121        // Rust
122        map.insert("rs", SupportedLanguage::Rust);
123        // Go
124        map.insert("go", SupportedLanguage::Go);
125        // C
126        map.insert("c", SupportedLanguage::C);
127        map.insert("h", SupportedLanguage::C);
128        // C++
129        map.insert("cpp", SupportedLanguage::Cpp);
130        map.insert("hpp", SupportedLanguage::Cpp);
131        map.insert("cc", SupportedLanguage::Cpp);
132        map.insert("cxx", SupportedLanguage::Cpp);
133        // C#
134        map.insert("cs", SupportedLanguage::CSharp);
135        map
136    })
137}
138
139// Initialize extension map on module load
140#[doc(hidden)]
141pub fn _init_extension_map() {
142    let _ = get_extension_map();
143}
144
145// ============================================================================
146// Manifest Languages
147// ============================================================================
148
149/// Manifest file languages for component extraction.
150///
151/// These are distinct from `SupportedLanguage` because manifest files
152/// have different grammars (JSON, TOML, XML, etc.) and use specialized
153/// SCM queries focused on extracting component names and dependencies.
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
155pub enum ManifestLanguage {
156    /// JSON manifests (package.json, vcpkg.json)
157    Json,
158    /// TOML manifests (Cargo.toml, pyproject.toml)
159    Toml,
160    /// Go module files (go.mod)
161    GoMod,
162    /// XML manifests (.csproj, .vbproj, .fsproj)
163    Xml,
164    /// CMake files (CMakeLists.txt)
165    CMake,
166}
167
168impl ManifestLanguage {
169    /// Get the language name as used in SCM query file names.
170    ///
171    /// Maps to `{name}-manifest-tags.scm` query files.
172    pub fn as_str(&self) -> &'static str {
173        match self {
174            ManifestLanguage::Json => "json",
175            ManifestLanguage::Toml => "toml",
176            ManifestLanguage::GoMod => "gomod",
177            ManifestLanguage::Xml => "xml",
178            ManifestLanguage::CMake => "cmake",
179        }
180    }
181
182    /// Get the tree-sitter Language for this manifest type.
183    pub fn tree_sitter_language(&self) -> Language {
184        match self {
185            ManifestLanguage::Json => tree_sitter_json::LANGUAGE.into(),
186            ManifestLanguage::Toml => tree_sitter_toml_ng::LANGUAGE.into(),
187            ManifestLanguage::GoMod => tree_sitter_gomod_orchard::LANGUAGE.into(),
188            ManifestLanguage::Xml => tree_sitter_xml::LANGUAGE_XML.into(),
189            ManifestLanguage::CMake => tree_sitter_cmake::LANGUAGE.into(),
190        }
191    }
192
193    /// Detect manifest language from filename.
194    ///
195    /// Returns `None` if the filename is not a recognized manifest file.
196    ///
197    /// # Recognized Manifest Files
198    ///
199    /// | Filename Pattern | Language | Component Type |
200    /// |-----------------|----------|----------------|
201    /// | `package.json` | Json | npm/Node.js |
202    /// | `vcpkg.json` | Json | vcpkg (C/C++) |
203    /// | `Cargo.toml` | Toml | Rust crate |
204    /// | `pyproject.toml` | Toml | Python package |
205    /// | `go.mod` | GoMod | Go module |
206    /// | `*.csproj` | Xml | C# project |
207    /// | `*.vbproj` | Xml | VB.NET project |
208    /// | `*.fsproj` | Xml | F# project |
209    /// | `CMakeLists.txt` | CMake | CMake project |
210    pub fn from_filename(filename: &str) -> Option<Self> {
211        match filename {
212            // JSON manifests
213            "package.json" => Some(ManifestLanguage::Json),
214            "vcpkg.json" => Some(ManifestLanguage::Json),
215            // TOML manifests
216            "Cargo.toml" => Some(ManifestLanguage::Toml),
217            "pyproject.toml" => Some(ManifestLanguage::Toml),
218            // Go module
219            "go.mod" => Some(ManifestLanguage::GoMod),
220            // CMake
221            "CMakeLists.txt" => Some(ManifestLanguage::CMake),
222            // XML-based project files
223            _ => {
224                if filename.ends_with(".csproj")
225                    || filename.ends_with(".vbproj")
226                    || filename.ends_with(".fsproj")
227                {
228                    Some(ManifestLanguage::Xml)
229                } else {
230                    None
231                }
232            }
233        }
234    }
235
236    /// Detect manifest language from file path.
237    ///
238    /// Extracts the filename from the path and calls `from_filename`.
239    pub fn from_path(path: &Path) -> Option<Self> {
240        path.file_name()
241            .and_then(|n| n.to_str())
242            .and_then(Self::from_filename)
243    }
244
245    /// Check if a file path is a manifest file.
246    pub fn is_manifest_file(path: &Path) -> bool {
247        Self::from_path(path).is_some()
248    }
249
250    /// Get all recognized manifest filenames (exact matches).
251    pub fn exact_filenames() -> &'static [&'static str] {
252        &[
253            "package.json",
254            "vcpkg.json",
255            "Cargo.toml",
256            "pyproject.toml",
257            "go.mod",
258            "CMakeLists.txt",
259        ]
260    }
261
262    /// Get all recognized manifest file extensions.
263    pub fn manifest_extensions() -> &'static [&'static str] {
264        &["csproj", "vbproj", "fsproj"]
265    }
266}
267
268impl std::fmt::Display for ManifestLanguage {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        write!(f, "{}", self.as_str())
271    }
272}
273
274// ============================================================================
275// Parser Errors
276// ============================================================================
277
278/// Errors that can occur during parsing.
279#[derive(Debug, Error)]
280pub enum ParserError {
281    /// Failed to create parser
282    #[error("Failed to create parser: {0}")]
283    ParserCreation(String),
284
285    /// Failed to set language
286    #[error("Failed to set language: {0}")]
287    LanguageSet(String),
288
289    /// Failed to parse source code
290    #[error("Failed to parse source code")]
291    ParseFailed,
292
293    /// Failed to load query file
294    #[error("Failed to load query file: {0}")]
295    QueryLoad(String),
296
297    /// Failed to compile query
298    #[error("Failed to compile query: {0}")]
299    QueryCompile(String),
300
301    /// Unsupported language
302    #[error("Unsupported language for file: {0}")]
303    UnsupportedLanguage(String),
304
305    /// IO error
306    #[error("IO error: {0}")]
307    Io(#[from] std::io::Error),
308}
309
310// ============================================================================
311// Code Parser
312// ============================================================================
313
314/// A tree-sitter based code parser with query support.
315pub struct CodeParser {
316    parser: Parser,
317    language: SupportedLanguage,
318}
319
320impl CodeParser {
321    /// Create a new parser for the specified language.
322    pub fn new(language: SupportedLanguage) -> Result<Self, ParserError> {
323        let mut parser = Parser::new();
324        parser
325            .set_language(&language.tree_sitter_language())
326            .map_err(|e| ParserError::LanguageSet(e.to_string()))?;
327
328        Ok(Self { parser, language })
329    }
330
331    /// Create a parser for the given file path.
332    ///
333    /// Detects language from file extension.
334    pub fn for_path(path: &Path) -> Result<Self, ParserError> {
335        let language = SupportedLanguage::from_path(path)
336            .ok_or_else(|| ParserError::UnsupportedLanguage(path.display().to_string()))?;
337        Self::new(language)
338    }
339
340    /// Get the language this parser is configured for.
341    pub fn language(&self) -> SupportedLanguage {
342        self.language
343    }
344
345    /// Parse source code into a syntax tree.
346    pub fn parse(&mut self, source: &str) -> Result<Tree, ParserError> {
347        self.parser
348            .parse(source, None)
349            .ok_or(ParserError::ParseFailed)
350    }
351
352    /// Parse source code with an existing tree for incremental parsing.
353    pub fn parse_with_old_tree(
354        &mut self,
355        source: &str,
356        old_tree: Option<&Tree>,
357    ) -> Result<Tree, ParserError> {
358        self.parser
359            .parse(source, old_tree)
360            .ok_or(ParserError::ParseFailed)
361    }
362}
363
364// ============================================================================
365// Query Manager
366// ============================================================================
367
368/// Manages tree-sitter queries for extracting code entities.
369pub struct QueryManager {
370    /// Base query for tag extraction
371    query: Query,
372    /// Language this query is for
373    language: SupportedLanguage,
374}
375
376impl QueryManager {
377    /// Create a new query manager from SCM query source.
378    pub fn new(language: SupportedLanguage, query_source: &str) -> Result<Self, ParserError> {
379        let ts_language = language.tree_sitter_language();
380        let query = Query::new(&ts_language, query_source)
381            .map_err(|e| ParserError::QueryCompile(format!("{:?}", e)))?;
382
383        Ok(Self { query, language })
384    }
385
386    /// Load query from a file path.
387    pub fn from_file(language: SupportedLanguage, path: &Path) -> Result<Self, ParserError> {
388        let query_source = std::fs::read_to_string(path)?;
389        Self::new(language, &query_source)
390    }
391
392    /// Load query from the default queries directory.
393    ///
394    /// Looks for `{language}-tags.scm` in the queries directory and
395    /// concatenates any overlay files from `overlays/{language}-*.scm`.
396    pub fn from_queries_dir(
397        language: SupportedLanguage,
398        queries_dir: &Path,
399    ) -> Result<Self, ParserError> {
400        let lang_str = language.as_str();
401        let base_path = queries_dir.join(format!("{}-tags.scm", lang_str));
402
403        if !base_path.exists() {
404            return Err(ParserError::QueryLoad(format!(
405                "Query file not found: {}",
406                base_path.display()
407            )));
408        }
409
410        // Load base query
411        let mut query_source = std::fs::read_to_string(&base_path)?;
412
413        // Load and concatenate overlay files
414        let overlays_dir = queries_dir.join("overlays");
415        if overlays_dir.exists() {
416            if let Ok(entries) = std::fs::read_dir(&overlays_dir) {
417                let mut overlay_files: Vec<_> = entries
418                    .filter_map(|e| e.ok())
419                    .filter(|e| {
420                        e.file_name().to_str().is_some_and(|name| {
421                            name.starts_with(&format!("{}-", lang_str)) && name.ends_with(".scm")
422                        })
423                    })
424                    .collect();
425
426                // Sort for deterministic ordering
427                overlay_files.sort_by_key(|e| e.file_name());
428
429                for entry in overlay_files {
430                    if let Ok(overlay_source) = std::fs::read_to_string(entry.path()) {
431                        query_source.push_str("\n\n");
432                        query_source.push_str(&overlay_source);
433                    }
434                }
435            }
436        }
437
438        Self::new(language, &query_source)
439    }
440
441    /// Load query from embedded queries (compiled into the binary).
442    ///
443    /// This is the preferred method for production use as it doesn't require
444    /// external query files.
445    pub fn from_embedded(language: SupportedLanguage) -> Result<Self, ParserError> {
446        let query_source = crate::embedded_queries::get_query(language).ok_or_else(|| {
447            ParserError::QueryLoad(format!(
448                "No embedded query available for language: {:?}",
449                language
450            ))
451        })?;
452        Self::new(language, &query_source)
453    }
454
455    /// Get the underlying tree-sitter query.
456    pub fn query(&self) -> &Query {
457        &self.query
458    }
459
460    /// Get the language this query is for.
461    pub fn language(&self) -> SupportedLanguage {
462        self.language
463    }
464
465    /// Get the capture names defined in this query.
466    pub fn capture_names(&self) -> &[&str] {
467        self.query.capture_names()
468    }
469
470    /// Get the capture index for a capture name.
471    pub fn capture_index(&self, name: &str) -> Option<u32> {
472        self.query.capture_index_for_name(name)
473    }
474}
475
476// ============================================================================
477// Extracted Tag
478// ============================================================================
479
480/// A tag extracted from source code via tree-sitter query.
481#[derive(Debug, Clone)]
482pub struct ExtractedTag {
483    /// The tag name (capture name from query, e.g., "definition.callable.function")
484    pub tag: String,
485    /// The captured text (entity name)
486    pub name: String,
487    /// Start line (0-indexed)
488    pub start_line: usize,
489    /// End line (0-indexed)
490    pub end_line: usize,
491    /// Start column (0-indexed)
492    pub start_col: usize,
493    /// End column (0-indexed)
494    pub end_col: usize,
495    /// Start byte offset
496    pub start_byte: usize,
497    /// End byte offset
498    pub end_byte: usize,
499    /// Parent node's start line (for containment tracking with name. captures)
500    pub parent_start_line: Option<usize>,
501    /// Parent node's end line (for containment tracking with name. captures)
502    pub parent_end_line: Option<usize>,
503    /// For Rust: The type being implemented (from impl blocks)
504    /// This allows methods in `impl Foo { }` to be associated with struct Foo
505    pub impl_target: Option<String>,
506}
507
508impl ExtractedTag {
509    /// Get the line number (1-indexed) for display.
510    pub fn line_number(&self) -> usize {
511        self.start_line + 1
512    }
513
514    /// Get the end line number (1-indexed) for display.
515    pub fn end_line_number(&self) -> usize {
516        self.end_line + 1
517    }
518
519    /// Get the containment start line (0-indexed).
520    /// Uses parent line if available, otherwise falls back to tag's own line.
521    pub fn containment_start_line(&self) -> usize {
522        self.parent_start_line.unwrap_or(self.start_line)
523    }
524
525    /// Get the containment end line (0-indexed).
526    /// Uses parent line if available, otherwise falls back to tag's own line.
527    pub fn containment_end_line(&self) -> usize {
528        self.parent_end_line.unwrap_or(self.end_line)
529    }
530}
531
532// ============================================================================
533// Containment Context
534// ============================================================================
535
536/// Entry in the containment stack.
537#[derive(Debug, Clone)]
538pub struct ContainmentEntry {
539    /// Full node ID of the container
540    pub node_id: String,
541    /// Type of the container ("Container" or "Callable")
542    pub node_type: String,
543    /// Starting line (0-indexed)
544    pub start_line: usize,
545    /// Ending line (0-indexed)
546    pub end_line: usize,
547    /// Name of the container entity
548    pub entity_name: String,
549}
550
551/// Tracks containment context during graph generation.
552///
553/// Maintains a stack of currently open containers based on line ranges,
554/// enabling determination of parent-child relationships for nested entities.
555///
556/// # Example
557///
558/// ```
559/// use codeprysm_core::parser::ContainmentContext;
560///
561/// let mut ctx = ContainmentContext::new();
562///
563/// // Processing a class definition at lines 10-50
564/// ctx.push_container("file.py:MyClass".to_string(), "Container".to_string(), 10, 50, "MyClass".to_string());
565///
566/// // Inside the class, push a method at lines 15-25
567/// ctx.update(15);
568/// ctx.push_container("file.py:MyClass:method".to_string(), "Callable".to_string(), 15, 25, "method".to_string());
569///
570/// // Get the containment path
571/// assert_eq!(ctx.get_containment_path(), vec!["MyClass", "method"]);
572///
573/// // After the method ends, update pops it
574/// ctx.update(30);
575/// assert_eq!(ctx.get_containment_path(), vec!["MyClass"]);
576/// ```
577#[derive(Debug, Default)]
578pub struct ContainmentContext {
579    /// Stack of active containers
580    stack: Vec<ContainmentEntry>,
581}
582
583impl ContainmentContext {
584    /// Create a new empty containment context.
585    pub fn new() -> Self {
586        Self { stack: Vec::new() }
587    }
588
589    /// Update the stack by popping containers that have ended.
590    ///
591    /// Call this before processing each new entity to ensure the stack
592    /// reflects the current position in the source file.
593    pub fn update(&mut self, current_line: usize) {
594        // Pop containers whose end_line we've passed
595        while let Some(entry) = self.stack.last() {
596            if entry.end_line < current_line {
597                self.stack.pop();
598            } else {
599                break;
600            }
601        }
602    }
603
604    /// Push a new container onto the stack.
605    ///
606    /// Only Container and Callable types can contain other entities.
607    pub fn push_container(
608        &mut self,
609        node_id: String,
610        node_type: String,
611        start_line: usize,
612        end_line: usize,
613        entity_name: String,
614    ) {
615        // Containers and Callables can contain other entities
616        if node_type == "Container" || node_type == "Callable" {
617            self.stack.push(ContainmentEntry {
618                node_id,
619                node_type,
620                start_line,
621                end_line,
622                entity_name,
623            });
624        }
625    }
626
627    /// Get the node ID of the current innermost container.
628    ///
629    /// Returns `None` if at file level (no active containers).
630    pub fn get_current_parent_id(&self) -> Option<&str> {
631        self.stack.last().map(|e| e.node_id.as_str())
632    }
633
634    /// Get the full containment path as a list of entity names.
635    ///
636    /// Returns the path from outermost to innermost container.
637    pub fn get_containment_path(&self) -> Vec<&str> {
638        self.stack.iter().map(|e| e.entity_name.as_str()).collect()
639    }
640
641    /// Check if a container is currently active (on the stack).
642    pub fn is_container_active(&self, node_id: &str) -> bool {
643        self.stack.iter().any(|e| e.node_id == node_id)
644    }
645
646    /// Get the current stack depth.
647    pub fn depth(&self) -> usize {
648        self.stack.len()
649    }
650
651    /// Check if the stack is empty (at file level).
652    pub fn is_empty(&self) -> bool {
653        self.stack.is_empty()
654    }
655
656    /// Clear the containment stack (typically called between files).
657    pub fn clear(&mut self) {
658        self.stack.clear();
659    }
660
661    /// Get a reference to the current stack.
662    pub fn stack(&self) -> &[ContainmentEntry] {
663        &self.stack
664    }
665}
666
667impl std::fmt::Display for ContainmentContext {
668    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
669        if self.stack.is_empty() {
670            write!(f, "ContainmentContext(empty)")
671        } else {
672            let path: Vec<String> = self
673                .stack
674                .iter()
675                .map(|e| format!("{}({})", e.entity_name, e.node_type))
676                .collect();
677            write!(f, "ContainmentContext({})", path.join(" → "))
678        }
679    }
680}
681
682// ============================================================================
683// Node ID Generation
684// ============================================================================
685
686/// Generate a hierarchical node ID for an entity.
687///
688/// Node IDs follow the format: `file:Container1:Container2:Entity`
689///
690/// # Arguments
691///
692/// * `file_path` - Relative file path (e.g., "src/models.py")
693/// * `containment_stack` - Stack of parent entity names
694/// * `entity_name` - Name of the entity
695/// * `line` - Line number for anonymous entities (e.g., lambdas)
696///
697/// # Examples
698///
699/// ```
700/// use codeprysm_core::parser::generate_node_id;
701///
702/// assert_eq!(
703///     generate_node_id("src/models.py", &["User"], "save", None),
704///     "src/models.py:User:save"
705/// );
706/// assert_eq!(
707///     generate_node_id("src/utils.py", &["process"], "<lambda>", Some(42)),
708///     "src/utils.py:process:<lambda>:42"
709/// );
710/// ```
711pub fn generate_node_id(
712    file_path: &str,
713    containment_stack: &[&str],
714    entity_name: &str,
715    line: Option<usize>,
716) -> String {
717    let mut components = vec![file_path];
718    components.extend(containment_stack);
719
720    // Handle anonymous entities with line numbers
721    if entity_name.starts_with('<') && entity_name.ends_with('>') {
722        if let Some(line_num) = line {
723            components.push(entity_name);
724            return format!("{}:{}", components.join(":"), line_num);
725        }
726    }
727
728    components.push(entity_name);
729    components.join(":")
730}
731
732/// Parse a node ID into its components.
733///
734/// Returns the file path, containment stack, and entity name.
735///
736/// # Returns
737///
738/// Tuple of (file_path, containment_stack, entity_name)
739///
740/// # Errors
741///
742/// Returns `None` if the node ID format is invalid.
743pub fn parse_node_id(node_id: &str) -> Option<(&str, Vec<&str>, &str)> {
744    let parts: Vec<&str> = node_id.split(':').collect();
745    if parts.len() < 2 {
746        return None;
747    }
748
749    let file_path = parts[0];
750    let entity_name = parts[parts.len() - 1];
751    let containment = parts[1..parts.len() - 1].to_vec();
752
753    Some((file_path, containment, entity_name))
754}
755
756// ============================================================================
757// Tag Extractor
758// ============================================================================
759
760/// Extracts tags from source code using tree-sitter queries.
761pub struct TagExtractor {
762    parser: CodeParser,
763    query_manager: QueryManager,
764}
765
766impl TagExtractor {
767    /// Create a new tag extractor for the specified language.
768    pub fn new(language: SupportedLanguage, query_source: &str) -> Result<Self, ParserError> {
769        let parser = CodeParser::new(language)?;
770        let query_manager = QueryManager::new(language, query_source)?;
771        Ok(Self {
772            parser,
773            query_manager,
774        })
775    }
776
777    /// Create a tag extractor using queries from a directory.
778    pub fn from_queries_dir(
779        language: SupportedLanguage,
780        queries_dir: &Path,
781    ) -> Result<Self, ParserError> {
782        let parser = CodeParser::new(language)?;
783        let query_manager = QueryManager::from_queries_dir(language, queries_dir)?;
784        Ok(Self {
785            parser,
786            query_manager,
787        })
788    }
789
790    /// Create a tag extractor using embedded queries.
791    ///
792    /// This is the preferred method for production use as it doesn't require
793    /// external query files.
794    pub fn from_embedded(language: SupportedLanguage) -> Result<Self, ParserError> {
795        let parser = CodeParser::new(language)?;
796        let query_manager = QueryManager::from_embedded(language)?;
797        Ok(Self {
798            parser,
799            query_manager,
800        })
801    }
802
803    /// Get the language this extractor is configured for.
804    pub fn language(&self) -> SupportedLanguage {
805        self.parser.language()
806    }
807
808    /// Extract tags from source code.
809    pub fn extract(&mut self, source: &str) -> Result<Vec<ExtractedTag>, ParserError> {
810        let tree = self.parser.parse(source)?;
811        let source_bytes = source.as_bytes();
812        let is_rust = self.parser.language() == SupportedLanguage::Rust;
813
814        let mut tags = Vec::new();
815        let mut cursor = QueryCursor::new();
816        let query = self.query_manager.query();
817        let capture_names = query.capture_names();
818
819        let mut matches = cursor.matches(query, tree.root_node(), source_bytes);
820        while let Some(match_) = matches.next() {
821            for capture in match_.captures {
822                let capture_name = &capture_names[capture.index as usize];
823                let node = capture.node;
824
825                // Get the text of the captured node
826                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
827
828                // For name. captures (e.g., @name.definition.X), get the parent node's
829                // line range for proper containment tracking. The parent is the actual
830                // definition node (class_definition, function_definition, etc.).
831                let (parent_start_line, parent_end_line) = if capture_name.starts_with("name.") {
832                    if let Some(parent) = node.parent() {
833                        (
834                            Some(parent.start_position().row),
835                            Some(parent.end_position().row),
836                        )
837                    } else {
838                        (None, None)
839                    }
840                } else {
841                    (None, None)
842                };
843
844                // For Rust methods inside impl blocks, find the impl target type
845                let impl_target = if is_rust && capture_name.contains("callable.method") {
846                    find_impl_target(&node, source_bytes)
847                } else {
848                    None
849                };
850
851                tags.push(ExtractedTag {
852                    tag: (*capture_name).to_string(),
853                    name: text,
854                    start_line: node.start_position().row,
855                    end_line: node.end_position().row,
856                    start_col: node.start_position().column,
857                    end_col: node.end_position().column,
858                    start_byte: node.start_byte(),
859                    end_byte: node.end_byte(),
860                    parent_start_line,
861                    parent_end_line,
862                    impl_target,
863                });
864            }
865        }
866
867        Ok(tags)
868    }
869}
870
871/// Find the impl target type for a node inside a Rust impl block.
872/// Traverses up the AST to find an impl_item and extracts its type identifier.
873///
874/// For `impl Trait for Type`, returns "Type" (not "Trait").
875/// For `impl Type`, returns "Type".
876fn find_impl_target(node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
877    let mut current = node.parent();
878    while let Some(parent) = current {
879        if parent.kind() == "impl_item" {
880            // FIRST: Try to get the "type" field specifically
881            // This correctly handles `impl Trait for Type` (returns Type, not Trait)
882            if let Some(type_child) = parent.child_by_field_name("type") {
883                // Handle generic_type: `impl<T> Foo<T>` or `impl Trait for Foo<T>`
884                if type_child.kind() == "generic_type" {
885                    for child in type_child.children(&mut type_child.walk()) {
886                        if child.kind() == "type_identifier" {
887                            if let Ok(type_name) = child.utf8_text(source) {
888                                return Some(type_name.to_string());
889                            }
890                        }
891                    }
892                } else if let Ok(type_name) = type_child.utf8_text(source) {
893                    return Some(type_name.to_string());
894                }
895            }
896
897            // FALLBACK: For simple `impl Type` without trait, find first type_identifier
898            // This is only reached if child_by_field_name("type") didn't find anything
899            for child in parent.children(&mut parent.walk()) {
900                if child.kind() == "type_identifier" {
901                    if let Ok(type_name) = child.utf8_text(source) {
902                        return Some(type_name.to_string());
903                    }
904                }
905            }
906            break;
907        }
908        current = parent.parent();
909    }
910    None
911}
912
913// ============================================================================
914// Metadata Extraction
915// ============================================================================
916
917use crate::graph::NodeMetadata;
918use tree_sitter::Node;
919
920/// Extracts metadata from AST nodes for a specific language.
921pub struct MetadataExtractor {
922    language: SupportedLanguage,
923}
924
925impl MetadataExtractor {
926    /// Create a new metadata extractor for the specified language.
927    pub fn new(language: SupportedLanguage) -> Self {
928        Self { language }
929    }
930
931    /// Extract metadata from an AST node and its text.
932    ///
933    /// # Arguments
934    ///
935    /// * `node` - The tree-sitter AST node
936    /// * `source` - The source code bytes
937    ///
938    /// # Returns
939    ///
940    /// A `NodeMetadata` struct with extracted metadata.
941    pub fn extract(&self, node: &Node, source: &[u8]) -> NodeMetadata {
942        let node_text = node.utf8_text(source).unwrap_or("");
943
944        match self.language {
945            SupportedLanguage::Python => extract_python_metadata(node, node_text, source),
946            SupportedLanguage::JavaScript
947            | SupportedLanguage::TypeScript
948            | SupportedLanguage::Tsx => extract_typescript_metadata(node, node_text, source),
949            SupportedLanguage::Go => extract_go_metadata(node, source),
950            SupportedLanguage::CSharp => extract_csharp_metadata(node, node_text, source),
951            SupportedLanguage::Rust => extract_rust_metadata(node, node_text, source),
952            SupportedLanguage::C | SupportedLanguage::Cpp => {
953                extract_c_cpp_metadata(node, node_text, source)
954            }
955        }
956    }
957
958    /// Extract metadata from an entity name (for convention-based visibility).
959    ///
960    /// Some languages like Python and Go use naming conventions for visibility.
961    /// This method extracts metadata from the entity name alone.
962    pub fn extract_from_name(&self, name: &str) -> NodeMetadata {
963        let mut metadata = NodeMetadata::default();
964
965        match self.language {
966            SupportedLanguage::Python => {
967                // Python visibility by naming convention
968                // Dunder methods (__init__, __str__, etc.) are public
969                // Double underscore without trailing __ is private (name mangling)
970                // Single underscore is protected (convention)
971                if name.starts_with("__") && !name.ends_with("__") {
972                    metadata.visibility = Some("private".to_string());
973                } else if name.starts_with('_') && !name.starts_with("__") {
974                    metadata.visibility = Some("protected".to_string());
975                } else {
976                    metadata.visibility = Some("public".to_string());
977                }
978            }
979            SupportedLanguage::Go => {
980                // Go: uppercase first letter = exported (public)
981                if let Some(first_char) = name.chars().next() {
982                    if first_char.is_uppercase() {
983                        metadata.visibility = Some("public".to_string());
984                    } else {
985                        metadata.visibility = Some("private".to_string());
986                    }
987                }
988            }
989            _ => {}
990        }
991
992        metadata
993    }
994}
995
996/// Extract Python-specific metadata.
997fn extract_python_metadata(node: &Node, node_text: &str, source: &[u8]) -> NodeMetadata {
998    let mut metadata = NodeMetadata::default();
999
1000    // Check for async keyword
1001    if node_text.contains("async def") || node_text.contains("async with") {
1002        metadata.is_async = Some(true);
1003    }
1004
1005    // Also check for async child node (more reliable)
1006    if has_child_of_kind(node, "async") {
1007        metadata.is_async = Some(true);
1008    }
1009
1010    // Python visibility by naming convention
1011    // (handled by extract_from_name, but check node text too)
1012    if let Some(name) = find_identifier_text(node, source) {
1013        // Dunder methods (__init__, __str__, etc.) are public
1014        // Double underscore without trailing __ is private (name mangling)
1015        // Single underscore is protected (convention)
1016        if name.starts_with("__") && !name.ends_with("__") {
1017            metadata.visibility = Some("private".to_string());
1018        } else if name.starts_with('_') && !name.starts_with("__") {
1019            metadata.visibility = Some("protected".to_string());
1020        } else {
1021            metadata.visibility = Some("public".to_string());
1022        }
1023    }
1024
1025    // Extract decorators
1026    let decorators = extract_decorators(node, source, "@");
1027    if !decorators.is_empty() {
1028        metadata.decorators = Some(decorators);
1029    }
1030
1031    metadata
1032}
1033
1034/// Extract TypeScript/JavaScript-specific metadata.
1035fn extract_typescript_metadata(node: &Node, node_text: &str, source: &[u8]) -> NodeMetadata {
1036    let mut metadata = NodeMetadata::default();
1037
1038    // Check for async by examining child nodes
1039    let node_kind = node.kind();
1040
1041    if node_kind == "variable_declarator" || node_kind == "lexical_declaration" {
1042        // For arrow functions, look for arrow_function child with async
1043        if let Some(arrow_fn) = find_child_of_kind(node, "arrow_function") {
1044            if has_child_of_kind(&arrow_fn, "async") {
1045                metadata.is_async = Some(true);
1046            }
1047        }
1048    } else {
1049        // For other nodes (method_definition, function_declaration), check direct children
1050        if has_child_of_kind(node, "async") {
1051            metadata.is_async = Some(true);
1052        }
1053    }
1054
1055    // Extract visibility from modifiers
1056    if node_text.contains("private ") {
1057        metadata.visibility = Some("private".to_string());
1058    } else if node_text.contains("protected ") {
1059        metadata.visibility = Some("protected".to_string());
1060    } else if node_text.contains("public ") {
1061        metadata.visibility = Some("public".to_string());
1062    }
1063
1064    // Check for static
1065    if node_text.contains("static ") || has_child_of_kind(node, "static") {
1066        metadata.is_static = Some(true);
1067    }
1068
1069    // Check for abstract
1070    if node_text.contains("abstract ") || has_child_of_kind(node, "abstract") {
1071        metadata.is_abstract = Some(true);
1072    }
1073
1074    // Extract decorators (TypeScript uses @decorator syntax)
1075    let decorators = extract_decorators(node, source, "@");
1076    if !decorators.is_empty() {
1077        metadata.decorators = Some(decorators);
1078    }
1079
1080    metadata
1081}
1082
1083/// Extract Go-specific metadata.
1084fn extract_go_metadata(node: &Node, source: &[u8]) -> NodeMetadata {
1085    let mut metadata = NodeMetadata::default();
1086
1087    // Go uses naming convention for visibility
1088    // Uppercase first letter = exported (public), lowercase = unexported (private)
1089    let identifier_kinds = ["identifier", "field_identifier", "type_identifier"];
1090
1091    for identifier_kind in &identifier_kinds {
1092        if let Some(child) = find_child_of_kind(node, identifier_kind) {
1093            if let Ok(name) = child.utf8_text(source) {
1094                if let Some(first_char) = name.chars().next() {
1095                    if first_char.is_uppercase() {
1096                        metadata.visibility = Some("public".to_string());
1097                    } else {
1098                        metadata.visibility = Some("private".to_string());
1099                    }
1100                    break;
1101                }
1102            }
1103        }
1104    }
1105
1106    metadata
1107}
1108
1109/// Extract C#-specific metadata.
1110fn extract_csharp_metadata(_node: &Node, node_text: &str, _source: &[u8]) -> NodeMetadata {
1111    let mut metadata = NodeMetadata::default();
1112
1113    // Extract visibility
1114    if node_text.contains("private ") {
1115        metadata.visibility = Some("private".to_string());
1116    } else if node_text.contains("protected ") {
1117        metadata.visibility = Some("protected".to_string());
1118    } else if node_text.contains("public ") {
1119        metadata.visibility = Some("public".to_string());
1120    } else if node_text.contains("internal ") {
1121        metadata.visibility = Some("internal".to_string());
1122    }
1123
1124    // Check for static
1125    if node_text.contains("static ") {
1126        metadata.is_static = Some(true);
1127    }
1128
1129    // Check for abstract
1130    if node_text.contains("abstract ") {
1131        metadata.is_abstract = Some(true);
1132    }
1133
1134    // Check for virtual
1135    if node_text.contains("virtual ") {
1136        metadata.is_virtual = Some(true);
1137    }
1138
1139    // Check for async
1140    if node_text.contains("async ") {
1141        metadata.is_async = Some(true);
1142    }
1143
1144    // C# modifiers
1145    let mut modifiers = Vec::new();
1146    if node_text.contains("sealed ") {
1147        modifiers.push("sealed".to_string());
1148    }
1149    if node_text.contains("override ") {
1150        modifiers.push("override".to_string());
1151    }
1152    if node_text.contains("readonly ") {
1153        modifiers.push("readonly".to_string());
1154    }
1155    if node_text.contains("new ") {
1156        modifiers.push("new".to_string());
1157    }
1158    if !modifiers.is_empty() {
1159        metadata.modifiers = Some(modifiers);
1160    }
1161
1162    // Note: C# attributes are captured by csharp-tags.scm @decorator tag
1163    // and would be processed by higher-level graph construction logic
1164
1165    metadata
1166}
1167
1168/// Extract Rust-specific metadata.
1169fn extract_rust_metadata(node: &Node, node_text: &str, source: &[u8]) -> NodeMetadata {
1170    let mut metadata = NodeMetadata::default();
1171
1172    // Extract visibility
1173    if node_text.contains("pub ") || has_child_of_kind(node, "visibility_modifier") {
1174        metadata.visibility = Some("public".to_string());
1175    } else {
1176        metadata.visibility = Some("private".to_string());
1177    }
1178
1179    // Check for async
1180    if node_text.contains("async ") || has_child_of_kind(node, "async") {
1181        metadata.is_async = Some(true);
1182    }
1183
1184    // Rust modifiers
1185    let mut modifiers = Vec::new();
1186    if node_text.contains("const ") {
1187        modifiers.push("const".to_string());
1188    }
1189    if node_text.contains("mut ") {
1190        modifiers.push("mut".to_string());
1191    }
1192    if node_text.contains("unsafe ") {
1193        modifiers.push("unsafe".to_string());
1194    }
1195    if node_text.contains("extern ") {
1196        modifiers.push("extern".to_string());
1197    }
1198    if !modifiers.is_empty() {
1199        metadata.modifiers = Some(modifiers);
1200    }
1201
1202    // Extract attributes (Rust uses #[attr] syntax)
1203    let attributes = extract_rust_attributes(node, source);
1204    if !attributes.is_empty() {
1205        metadata.decorators = Some(attributes);
1206    }
1207
1208    metadata
1209}
1210
1211/// Extract C/C++-specific metadata.
1212fn extract_c_cpp_metadata(_node: &Node, node_text: &str, _source: &[u8]) -> NodeMetadata {
1213    let mut metadata = NodeMetadata::default();
1214
1215    // C++ visibility
1216    if node_text.contains("private:") || node_text.contains("private ") {
1217        metadata.visibility = Some("private".to_string());
1218    } else if node_text.contains("protected:") || node_text.contains("protected ") {
1219        metadata.visibility = Some("protected".to_string());
1220    } else if node_text.contains("public:") || node_text.contains("public ") {
1221        metadata.visibility = Some("public".to_string());
1222    }
1223
1224    // Check for static
1225    if node_text.contains("static ") {
1226        metadata.is_static = Some(true);
1227    }
1228
1229    // Check for virtual
1230    if node_text.contains("virtual ") {
1231        metadata.is_virtual = Some(true);
1232    }
1233
1234    // C++ modifiers
1235    let mut modifiers = Vec::new();
1236    if node_text.contains("const ") {
1237        modifiers.push("const".to_string());
1238    }
1239    if node_text.contains("inline ") {
1240        modifiers.push("inline".to_string());
1241    }
1242    if node_text.contains("extern ") {
1243        modifiers.push("extern".to_string());
1244    }
1245    if node_text.contains("constexpr ") {
1246        modifiers.push("constexpr".to_string());
1247    }
1248    if node_text.contains("override") {
1249        modifiers.push("override".to_string());
1250    }
1251    if node_text.contains("final") {
1252        modifiers.push("final".to_string());
1253    }
1254    if node_text.contains("noexcept") {
1255        modifiers.push("noexcept".to_string());
1256    }
1257    if !modifiers.is_empty() {
1258        metadata.modifiers = Some(modifiers);
1259    }
1260
1261    metadata
1262}
1263
1264// ============================================================================
1265// Helper Functions for Metadata Extraction
1266// ============================================================================
1267
1268/// Check if a node has a child of the specified kind.
1269fn has_child_of_kind(node: &Node, kind: &str) -> bool {
1270    let mut cursor = node.walk();
1271    for child in node.children(&mut cursor) {
1272        if child.kind() == kind {
1273            return true;
1274        }
1275    }
1276    false
1277}
1278
1279/// Find the first child of a specific kind.
1280fn find_child_of_kind<'a>(node: &'a Node, kind: &str) -> Option<Node<'a>> {
1281    let mut cursor = node.walk();
1282    let result = node
1283        .children(&mut cursor)
1284        .find(|child| child.kind() == kind);
1285    result
1286}
1287
1288/// Find identifier text from a node (looks for identifier child nodes).
1289fn find_identifier_text<'a>(node: &Node, source: &'a [u8]) -> Option<&'a str> {
1290    let mut cursor = node.walk();
1291    for child in node.children(&mut cursor) {
1292        let kind = child.kind();
1293        if kind == "identifier" || kind == "name" {
1294            return child.utf8_text(source).ok();
1295        }
1296    }
1297    None
1298}
1299
1300/// Extract decorators/attributes from a node.
1301///
1302/// Looks for sibling decorator nodes that precede the given node.
1303fn extract_decorators(node: &Node, source: &[u8], prefix: &str) -> Vec<String> {
1304    let mut decorators = Vec::new();
1305
1306    // Look for decorator nodes in the parent's children that precede this node
1307    if let Some(parent) = node.parent() {
1308        let mut cursor = parent.walk();
1309        for sibling in parent.children(&mut cursor) {
1310            // Stop when we reach our node
1311            if sibling.id() == node.id() {
1312                break;
1313            }
1314
1315            let kind = sibling.kind();
1316            if kind == "decorator" || kind == "decorated_definition" {
1317                if let Ok(text) = sibling.utf8_text(source) {
1318                    // Extract the decorator name (strip @ prefix and parameters)
1319                    let decorator_text = text.trim();
1320                    if let Some(stripped) = decorator_text.strip_prefix(prefix) {
1321                        let name = stripped.split('(').next().unwrap_or(stripped).trim();
1322                        if !name.is_empty() {
1323                            decorators.push(name.to_string());
1324                        }
1325                    }
1326                }
1327            }
1328        }
1329    }
1330
1331    // Also check for decorators as direct children
1332    let mut cursor = node.walk();
1333    for child in node.children(&mut cursor) {
1334        if child.kind() == "decorator" {
1335            if let Ok(text) = child.utf8_text(source) {
1336                let decorator_text = text.trim();
1337                if let Some(stripped) = decorator_text.strip_prefix(prefix) {
1338                    let name = stripped.split('(').next().unwrap_or(stripped).trim();
1339                    if !name.is_empty() {
1340                        decorators.push(name.to_string());
1341                    }
1342                }
1343            }
1344        }
1345    }
1346
1347    decorators
1348}
1349
1350/// Extract Rust attributes (#[...]) from a node.
1351fn extract_rust_attributes(node: &Node, source: &[u8]) -> Vec<String> {
1352    let mut attributes = Vec::new();
1353
1354    // Look for attribute nodes in the parent's children that precede this node
1355    if let Some(parent) = node.parent() {
1356        let mut cursor = parent.walk();
1357        for sibling in parent.children(&mut cursor) {
1358            // Stop when we reach our node
1359            if sibling.id() == node.id() {
1360                break;
1361            }
1362
1363            if sibling.kind() == "attribute_item" || sibling.kind() == "inner_attribute_item" {
1364                if let Ok(text) = sibling.utf8_text(source) {
1365                    // Extract attribute content (strip #[ and ])
1366                    let attr_text = text.trim();
1367                    if attr_text.starts_with("#[") && attr_text.ends_with(']') {
1368                        let inner = &attr_text[2..attr_text.len() - 1];
1369                        // Get just the attribute name (before any parameters)
1370                        let name = inner.split('(').next().unwrap_or(inner).trim();
1371                        if !name.is_empty() {
1372                            attributes.push(name.to_string());
1373                        }
1374                    }
1375                }
1376            }
1377        }
1378    }
1379
1380    // Also check direct children
1381    let mut cursor = node.walk();
1382    for child in node.children(&mut cursor) {
1383        if child.kind() == "attribute_item" || child.kind() == "inner_attribute_item" {
1384            if let Ok(text) = child.utf8_text(source) {
1385                let attr_text = text.trim();
1386                if attr_text.starts_with("#[") && attr_text.ends_with(']') {
1387                    let inner = &attr_text[2..attr_text.len() - 1];
1388                    let name = inner.split('(').next().unwrap_or(inner).trim();
1389                    if !name.is_empty() {
1390                        attributes.push(name.to_string());
1391                    }
1392                }
1393            }
1394        }
1395    }
1396
1397    attributes
1398}
1399
1400// ============================================================================
1401// Tests
1402// ============================================================================
1403
1404#[cfg(test)]
1405mod tests {
1406    use super::*;
1407
1408    #[test]
1409    fn test_language_from_extension() {
1410        assert_eq!(
1411            SupportedLanguage::from_extension("py"),
1412            Some(SupportedLanguage::Python)
1413        );
1414        assert_eq!(
1415            SupportedLanguage::from_extension("js"),
1416            Some(SupportedLanguage::JavaScript)
1417        );
1418        assert_eq!(
1419            SupportedLanguage::from_extension("ts"),
1420            Some(SupportedLanguage::TypeScript)
1421        );
1422        assert_eq!(
1423            SupportedLanguage::from_extension("tsx"),
1424            Some(SupportedLanguage::Tsx)
1425        );
1426        assert_eq!(
1427            SupportedLanguage::from_extension("rs"),
1428            Some(SupportedLanguage::Rust)
1429        );
1430        assert_eq!(
1431            SupportedLanguage::from_extension("go"),
1432            Some(SupportedLanguage::Go)
1433        );
1434        assert_eq!(
1435            SupportedLanguage::from_extension("c"),
1436            Some(SupportedLanguage::C)
1437        );
1438        assert_eq!(
1439            SupportedLanguage::from_extension("cpp"),
1440            Some(SupportedLanguage::Cpp)
1441        );
1442        assert_eq!(
1443            SupportedLanguage::from_extension("cs"),
1444            Some(SupportedLanguage::CSharp)
1445        );
1446        assert_eq!(SupportedLanguage::from_extension("unknown"), None);
1447    }
1448
1449    #[test]
1450    fn test_language_from_path() {
1451        assert_eq!(
1452            SupportedLanguage::from_path(Path::new("src/main.py")),
1453            Some(SupportedLanguage::Python)
1454        );
1455        assert_eq!(
1456            SupportedLanguage::from_path(Path::new("app.tsx")),
1457            Some(SupportedLanguage::Tsx)
1458        );
1459        assert_eq!(SupportedLanguage::from_path(Path::new("README.md")), None);
1460    }
1461
1462    #[test]
1463    fn test_language_as_str() {
1464        assert_eq!(SupportedLanguage::Python.as_str(), "python");
1465        assert_eq!(SupportedLanguage::TypeScript.as_str(), "typescript");
1466        assert_eq!(SupportedLanguage::Tsx.as_str(), "typescript");
1467        assert_eq!(SupportedLanguage::CSharp.as_str(), "csharp");
1468    }
1469
1470    #[test]
1471    fn test_parser_creation() {
1472        let parser = CodeParser::new(SupportedLanguage::Python);
1473        assert!(parser.is_ok());
1474    }
1475
1476    #[test]
1477    fn test_parse_python() {
1478        let mut parser = CodeParser::new(SupportedLanguage::Python).unwrap();
1479        let source = "def hello():\n    pass";
1480        let tree = parser.parse(source);
1481        assert!(tree.is_ok());
1482
1483        let tree = tree.unwrap();
1484        assert_eq!(tree.root_node().kind(), "module");
1485    }
1486
1487    #[test]
1488    fn test_parse_rust() {
1489        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1490        let source = "fn main() {}";
1491        let tree = parser.parse(source);
1492        assert!(tree.is_ok());
1493
1494        let tree = tree.unwrap();
1495        assert_eq!(tree.root_node().kind(), "source_file");
1496    }
1497
1498    #[test]
1499    fn test_parse_typescript() {
1500        let mut parser = CodeParser::new(SupportedLanguage::TypeScript).unwrap();
1501        let source = "function greet(name: string): void {}";
1502        let tree = parser.parse(source);
1503        assert!(tree.is_ok());
1504    }
1505
1506    #[test]
1507    fn test_query_manager_simple() {
1508        // Simple query that captures function names
1509        let query_source = r#"
1510            (function_definition
1511                name: (identifier) @name.definition.callable.function)
1512        "#;
1513
1514        let qm = QueryManager::new(SupportedLanguage::Python, query_source);
1515        assert!(qm.is_ok());
1516
1517        let qm = qm.unwrap();
1518        assert!(
1519            qm.capture_index("name.definition.callable.function")
1520                .is_some()
1521        );
1522    }
1523
1524    #[test]
1525    fn test_tag_extractor_python() {
1526        let query_source = r#"
1527            (function_definition
1528                name: (identifier) @name.definition.callable.function) @definition.callable.function
1529        "#;
1530
1531        let mut extractor = TagExtractor::new(SupportedLanguage::Python, query_source).unwrap();
1532
1533        let source = r#"
1534def hello():
1535    pass
1536
1537def world():
1538    return 42
1539"#;
1540
1541        let tags = extractor.extract(source).unwrap();
1542
1543        // Should have 4 tags: 2 function definitions + 2 name captures
1544        assert_eq!(tags.len(), 4);
1545
1546        // Check function names were captured
1547        let names: Vec<_> = tags
1548            .iter()
1549            .filter(|t| t.tag == "name.definition.callable.function")
1550            .map(|t| t.name.as_str())
1551            .collect();
1552        assert!(names.contains(&"hello"));
1553        assert!(names.contains(&"world"));
1554    }
1555
1556    #[test]
1557    fn test_tag_extractor_rust() {
1558        let query_source = r#"
1559            (function_item
1560                name: (identifier) @name.definition.callable.function) @definition.callable.function
1561        "#;
1562
1563        let mut extractor = TagExtractor::new(SupportedLanguage::Rust, query_source).unwrap();
1564
1565        let source = r#"
1566fn main() {
1567    println!("Hello");
1568}
1569
1570fn helper() -> i32 {
1571    42
1572}
1573"#;
1574
1575        let tags = extractor.extract(source).unwrap();
1576
1577        let names: Vec<_> = tags
1578            .iter()
1579            .filter(|t| t.tag == "name.definition.callable.function")
1580            .map(|t| t.name.as_str())
1581            .collect();
1582        assert!(names.contains(&"main"));
1583        assert!(names.contains(&"helper"));
1584    }
1585
1586    #[test]
1587    fn test_extracted_tag_line_numbers() {
1588        let query_source = r#"
1589            (function_definition
1590                name: (identifier) @name.definition.callable.function)
1591        "#;
1592
1593        let mut extractor = TagExtractor::new(SupportedLanguage::Python, query_source).unwrap();
1594
1595        let source = "def foo():\n    pass";
1596        let tags = extractor.extract(source).unwrap();
1597
1598        assert_eq!(tags.len(), 1);
1599        assert_eq!(tags[0].start_line, 0); // 0-indexed
1600        assert_eq!(tags[0].line_number(), 1); // 1-indexed for display
1601    }
1602
1603    #[test]
1604    fn test_parser_for_path() {
1605        let parser = CodeParser::for_path(Path::new("test.py"));
1606        assert!(parser.is_ok());
1607        assert_eq!(parser.unwrap().language(), SupportedLanguage::Python);
1608
1609        let parser = CodeParser::for_path(Path::new("test.unknown"));
1610        assert!(parser.is_err());
1611    }
1612
1613    // Containment Context Tests
1614
1615    #[test]
1616    fn test_containment_context_new() {
1617        let ctx = ContainmentContext::new();
1618        assert!(ctx.is_empty());
1619        assert_eq!(ctx.depth(), 0);
1620        assert_eq!(ctx.get_current_parent_id(), None);
1621    }
1622
1623    #[test]
1624    fn test_containment_context_push() {
1625        let mut ctx = ContainmentContext::new();
1626        ctx.push_container(
1627            "file.py:MyClass".to_string(),
1628            "Container".to_string(),
1629            10,
1630            50,
1631            "MyClass".to_string(),
1632        );
1633
1634        assert!(!ctx.is_empty());
1635        assert_eq!(ctx.depth(), 1);
1636        assert_eq!(ctx.get_current_parent_id(), Some("file.py:MyClass"));
1637        assert_eq!(ctx.get_containment_path(), vec!["MyClass"]);
1638    }
1639
1640    #[test]
1641    fn test_containment_context_nested() {
1642        let mut ctx = ContainmentContext::new();
1643
1644        // Push class
1645        ctx.push_container(
1646            "file.py:MyClass".to_string(),
1647            "Container".to_string(),
1648            10,
1649            50,
1650            "MyClass".to_string(),
1651        );
1652
1653        // Push method inside class
1654        ctx.push_container(
1655            "file.py:MyClass:method".to_string(),
1656            "Callable".to_string(),
1657            15,
1658            25,
1659            "method".to_string(),
1660        );
1661
1662        assert_eq!(ctx.depth(), 2);
1663        assert_eq!(ctx.get_current_parent_id(), Some("file.py:MyClass:method"));
1664        assert_eq!(ctx.get_containment_path(), vec!["MyClass", "method"]);
1665    }
1666
1667    #[test]
1668    fn test_containment_context_update_pops() {
1669        let mut ctx = ContainmentContext::new();
1670
1671        // Push class at lines 10-50
1672        ctx.push_container(
1673            "file.py:MyClass".to_string(),
1674            "Container".to_string(),
1675            10,
1676            50,
1677            "MyClass".to_string(),
1678        );
1679
1680        // Push method at lines 15-25
1681        ctx.push_container(
1682            "file.py:MyClass:method".to_string(),
1683            "Callable".to_string(),
1684            15,
1685            25,
1686            "method".to_string(),
1687        );
1688
1689        // Update to line 30 - should pop method but keep class
1690        ctx.update(30);
1691        assert_eq!(ctx.depth(), 1);
1692        assert_eq!(ctx.get_containment_path(), vec!["MyClass"]);
1693
1694        // Update to line 55 - should pop class too
1695        ctx.update(55);
1696        assert!(ctx.is_empty());
1697    }
1698
1699    #[test]
1700    fn test_containment_context_is_container_active() {
1701        let mut ctx = ContainmentContext::new();
1702        ctx.push_container(
1703            "file.py:MyClass".to_string(),
1704            "Container".to_string(),
1705            10,
1706            50,
1707            "MyClass".to_string(),
1708        );
1709
1710        assert!(ctx.is_container_active("file.py:MyClass"));
1711        assert!(!ctx.is_container_active("file.py:OtherClass"));
1712    }
1713
1714    #[test]
1715    fn test_containment_context_clear() {
1716        let mut ctx = ContainmentContext::new();
1717        ctx.push_container(
1718            "file.py:MyClass".to_string(),
1719            "Container".to_string(),
1720            10,
1721            50,
1722            "MyClass".to_string(),
1723        );
1724
1725        ctx.clear();
1726        assert!(ctx.is_empty());
1727    }
1728
1729    #[test]
1730    fn test_containment_context_only_containers_and_callables() {
1731        let mut ctx = ContainmentContext::new();
1732
1733        // Data nodes should not be pushed
1734        ctx.push_container(
1735            "file.py:field".to_string(),
1736            "Data".to_string(),
1737            10,
1738            10,
1739            "field".to_string(),
1740        );
1741
1742        assert!(ctx.is_empty());
1743    }
1744
1745    #[test]
1746    fn test_containment_context_display() {
1747        let mut ctx = ContainmentContext::new();
1748        assert_eq!(format!("{}", ctx), "ContainmentContext(empty)");
1749
1750        ctx.push_container(
1751            "file.py:MyClass".to_string(),
1752            "Container".to_string(),
1753            10,
1754            50,
1755            "MyClass".to_string(),
1756        );
1757        ctx.push_container(
1758            "file.py:MyClass:method".to_string(),
1759            "Callable".to_string(),
1760            15,
1761            25,
1762            "method".to_string(),
1763        );
1764
1765        let display = format!("{}", ctx);
1766        assert!(display.contains("MyClass(Container)"));
1767        assert!(display.contains("method(Callable)"));
1768    }
1769
1770    // Node ID Generation Tests
1771
1772    #[test]
1773    fn test_generate_node_id_simple() {
1774        assert_eq!(
1775            generate_node_id("src/models.py", &[], "User", None),
1776            "src/models.py:User"
1777        );
1778    }
1779
1780    #[test]
1781    fn test_generate_node_id_with_containment() {
1782        assert_eq!(
1783            generate_node_id("src/models.py", &["User"], "save", None),
1784            "src/models.py:User:save"
1785        );
1786    }
1787
1788    #[test]
1789    fn test_generate_node_id_nested_containment() {
1790        assert_eq!(
1791            generate_node_id("src/models.py", &["Module", "Class"], "method", None),
1792            "src/models.py:Module:Class:method"
1793        );
1794    }
1795
1796    #[test]
1797    fn test_generate_node_id_lambda_with_line() {
1798        assert_eq!(
1799            generate_node_id("src/utils.py", &["process"], "<lambda>", Some(42)),
1800            "src/utils.py:process:<lambda>:42"
1801        );
1802    }
1803
1804    #[test]
1805    fn test_generate_node_id_lambda_without_line() {
1806        // Lambda without line number doesn't get special treatment
1807        assert_eq!(
1808            generate_node_id("src/utils.py", &["process"], "<lambda>", None),
1809            "src/utils.py:process:<lambda>"
1810        );
1811    }
1812
1813    #[test]
1814    fn test_parse_node_id_simple() {
1815        let result = parse_node_id("src/models.py:User");
1816        assert!(result.is_some());
1817
1818        let (file, containment, name) = result.unwrap();
1819        assert_eq!(file, "src/models.py");
1820        assert!(containment.is_empty());
1821        assert_eq!(name, "User");
1822    }
1823
1824    #[test]
1825    fn test_parse_node_id_with_containment() {
1826        let result = parse_node_id("src/models.py:User:save");
1827        assert!(result.is_some());
1828
1829        let (file, containment, name) = result.unwrap();
1830        assert_eq!(file, "src/models.py");
1831        assert_eq!(containment, vec!["User"]);
1832        assert_eq!(name, "save");
1833    }
1834
1835    #[test]
1836    fn test_parse_node_id_nested() {
1837        let result = parse_node_id("src/models.py:Module:Class:method");
1838        assert!(result.is_some());
1839
1840        let (file, containment, name) = result.unwrap();
1841        assert_eq!(file, "src/models.py");
1842        assert_eq!(containment, vec!["Module", "Class"]);
1843        assert_eq!(name, "method");
1844    }
1845
1846    #[test]
1847    fn test_parse_node_id_invalid() {
1848        assert!(parse_node_id("invalid").is_none());
1849    }
1850
1851    // Metadata Extraction Tests
1852
1853    #[test]
1854    fn test_metadata_extractor_python_visibility() {
1855        let extractor = MetadataExtractor::new(SupportedLanguage::Python);
1856
1857        // Public (default)
1858        let metadata = extractor.extract_from_name("my_function");
1859        assert_eq!(metadata.visibility, Some("public".to_string()));
1860
1861        // Protected (single underscore)
1862        let metadata = extractor.extract_from_name("_internal_helper");
1863        assert_eq!(metadata.visibility, Some("protected".to_string()));
1864
1865        // Private (double underscore)
1866        let metadata = extractor.extract_from_name("__private_method");
1867        assert_eq!(metadata.visibility, Some("private".to_string()));
1868
1869        // Dunder methods are public
1870        let metadata = extractor.extract_from_name("__init__");
1871        assert_eq!(metadata.visibility, Some("public".to_string()));
1872    }
1873
1874    #[test]
1875    fn test_metadata_extractor_go_visibility() {
1876        let extractor = MetadataExtractor::new(SupportedLanguage::Go);
1877
1878        // Exported (uppercase)
1879        let metadata = extractor.extract_from_name("ExportedFunction");
1880        assert_eq!(metadata.visibility, Some("public".to_string()));
1881
1882        // Unexported (lowercase)
1883        let metadata = extractor.extract_from_name("internalHelper");
1884        assert_eq!(metadata.visibility, Some("private".to_string()));
1885    }
1886
1887    #[test]
1888    fn test_metadata_extraction_python_async() {
1889        let mut parser = CodeParser::new(SupportedLanguage::Python).unwrap();
1890        let source = "async def fetch_data():\n    pass";
1891        let tree = parser.parse(source).unwrap();
1892        let root = tree.root_node();
1893
1894        let extractor = MetadataExtractor::new(SupportedLanguage::Python);
1895
1896        // Find the function definition node
1897        let func_node = root.child(0).unwrap();
1898        let metadata = extractor.extract(&func_node, source.as_bytes());
1899
1900        assert_eq!(metadata.is_async, Some(true));
1901    }
1902
1903    #[test]
1904    fn test_metadata_extraction_rust_pub() {
1905        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1906        let source = "pub fn public_function() {}";
1907        let tree = parser.parse(source).unwrap();
1908        let root = tree.root_node();
1909
1910        let extractor = MetadataExtractor::new(SupportedLanguage::Rust);
1911        let func_node = root.child(0).unwrap();
1912        let metadata = extractor.extract(&func_node, source.as_bytes());
1913
1914        assert_eq!(metadata.visibility, Some("public".to_string()));
1915    }
1916
1917    #[test]
1918    fn test_metadata_extraction_rust_private() {
1919        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1920        let source = "fn private_function() {}";
1921        let tree = parser.parse(source).unwrap();
1922        let root = tree.root_node();
1923
1924        let extractor = MetadataExtractor::new(SupportedLanguage::Rust);
1925        let func_node = root.child(0).unwrap();
1926        let metadata = extractor.extract(&func_node, source.as_bytes());
1927
1928        assert_eq!(metadata.visibility, Some("private".to_string()));
1929    }
1930
1931    #[test]
1932    fn test_metadata_extraction_rust_async() {
1933        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1934        let source = "async fn async_function() {}";
1935        let tree = parser.parse(source).unwrap();
1936        let root = tree.root_node();
1937
1938        let extractor = MetadataExtractor::new(SupportedLanguage::Rust);
1939        let func_node = root.child(0).unwrap();
1940        let metadata = extractor.extract(&func_node, source.as_bytes());
1941
1942        assert_eq!(metadata.is_async, Some(true));
1943    }
1944
1945    #[test]
1946    fn test_metadata_extraction_rust_modifiers() {
1947        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1948        let source = "pub const MY_CONST: i32 = 42;";
1949        let tree = parser.parse(source).unwrap();
1950        let root = tree.root_node();
1951
1952        let extractor = MetadataExtractor::new(SupportedLanguage::Rust);
1953        let const_node = root.child(0).unwrap();
1954        let metadata = extractor.extract(&const_node, source.as_bytes());
1955
1956        assert_eq!(metadata.visibility, Some("public".to_string()));
1957        assert!(
1958            metadata
1959                .modifiers
1960                .as_ref()
1961                .is_some_and(|m| m.contains(&"const".to_string()))
1962        );
1963    }
1964
1965    #[test]
1966    fn test_metadata_extraction_rust_unsafe() {
1967        let mut parser = CodeParser::new(SupportedLanguage::Rust).unwrap();
1968        let source = "unsafe fn dangerous() {}";
1969        let tree = parser.parse(source).unwrap();
1970        let root = tree.root_node();
1971
1972        let extractor = MetadataExtractor::new(SupportedLanguage::Rust);
1973        let func_node = root.child(0).unwrap();
1974        let metadata = extractor.extract(&func_node, source.as_bytes());
1975
1976        assert!(
1977            metadata
1978                .modifiers
1979                .as_ref()
1980                .is_some_and(|m| m.contains(&"unsafe".to_string()))
1981        );
1982    }
1983
1984    #[test]
1985    fn test_metadata_extraction_typescript_static() {
1986        let mut parser = CodeParser::new(SupportedLanguage::TypeScript).unwrap();
1987        let source = "class Foo { static bar() {} }";
1988        let tree = parser.parse(source).unwrap();
1989        let root = tree.root_node();
1990
1991        let extractor = MetadataExtractor::new(SupportedLanguage::TypeScript);
1992
1993        // Navigate to the method definition: class_declaration -> class_body -> method_definition
1994        let class_node = root.child(0).unwrap();
1995        let class_body = class_node.child_by_field_name("body").unwrap();
1996
1997        // Find method_definition in class_body
1998        let mut cursor = class_body.walk();
1999        let method_node = class_body
2000            .children(&mut cursor)
2001            .find(|n| n.kind() == "method_definition")
2002            .unwrap();
2003
2004        let metadata = extractor.extract(&method_node, source.as_bytes());
2005        assert_eq!(metadata.is_static, Some(true));
2006    }
2007
2008    #[test]
2009    fn test_metadata_extraction_csharp_abstract() {
2010        let mut parser = CodeParser::new(SupportedLanguage::CSharp).unwrap();
2011        let source = "public abstract class MyClass {}";
2012        let tree = parser.parse(source).unwrap();
2013        let root = tree.root_node();
2014
2015        let extractor = MetadataExtractor::new(SupportedLanguage::CSharp);
2016        let class_node = root.child(0).unwrap();
2017        let metadata = extractor.extract(&class_node, source.as_bytes());
2018
2019        assert_eq!(metadata.visibility, Some("public".to_string()));
2020        assert_eq!(metadata.is_abstract, Some(true));
2021    }
2022
2023    #[test]
2024    fn test_metadata_extraction_csharp_virtual() {
2025        let mut parser = CodeParser::new(SupportedLanguage::CSharp).unwrap();
2026        let source = "public virtual void DoSomething() {}";
2027        let tree = parser.parse(source).unwrap();
2028        let root = tree.root_node();
2029
2030        let extractor = MetadataExtractor::new(SupportedLanguage::CSharp);
2031        let method_node = root.child(0).unwrap();
2032        let metadata = extractor.extract(&method_node, source.as_bytes());
2033
2034        assert_eq!(metadata.visibility, Some("public".to_string()));
2035        assert_eq!(metadata.is_virtual, Some(true));
2036    }
2037
2038    #[test]
2039    fn test_metadata_extraction_cpp_static() {
2040        let mut parser = CodeParser::new(SupportedLanguage::Cpp).unwrap();
2041        let source = "static int count = 0;";
2042        let tree = parser.parse(source).unwrap();
2043        let root = tree.root_node();
2044
2045        let extractor = MetadataExtractor::new(SupportedLanguage::Cpp);
2046        let decl_node = root.child(0).unwrap();
2047        let metadata = extractor.extract(&decl_node, source.as_bytes());
2048
2049        assert_eq!(metadata.is_static, Some(true));
2050    }
2051
2052    #[test]
2053    fn test_metadata_extraction_cpp_virtual() {
2054        let mut parser = CodeParser::new(SupportedLanguage::Cpp).unwrap();
2055        let source = "virtual void update() {}";
2056        let tree = parser.parse(source).unwrap();
2057        let root = tree.root_node();
2058
2059        let extractor = MetadataExtractor::new(SupportedLanguage::Cpp);
2060        let func_node = root.child(0).unwrap();
2061        let metadata = extractor.extract(&func_node, source.as_bytes());
2062
2063        assert_eq!(metadata.is_virtual, Some(true));
2064    }
2065
2066    #[test]
2067    fn test_metadata_extraction_cpp_modifiers() {
2068        let mut parser = CodeParser::new(SupportedLanguage::Cpp).unwrap();
2069        let source = "inline constexpr int square(int x) { return x * x; }";
2070        let tree = parser.parse(source).unwrap();
2071        let root = tree.root_node();
2072
2073        let extractor = MetadataExtractor::new(SupportedLanguage::Cpp);
2074        let func_node = root.child(0).unwrap();
2075        let metadata = extractor.extract(&func_node, source.as_bytes());
2076
2077        let modifiers = metadata.modifiers.unwrap();
2078        assert!(modifiers.contains(&"inline".to_string()));
2079        assert!(modifiers.contains(&"constexpr".to_string()));
2080    }
2081
2082    #[test]
2083    fn test_metadata_extraction_go() {
2084        let mut parser = CodeParser::new(SupportedLanguage::Go).unwrap();
2085        let source = "func ExportedFunc() {}";
2086        let tree = parser.parse(source).unwrap();
2087        let root = tree.root_node();
2088
2089        let extractor = MetadataExtractor::new(SupportedLanguage::Go);
2090        let func_node = root.child(0).unwrap();
2091        let metadata = extractor.extract(&func_node, source.as_bytes());
2092
2093        assert_eq!(metadata.visibility, Some("public".to_string()));
2094    }
2095
2096    #[test]
2097    fn test_metadata_extraction_go_unexported() {
2098        let mut parser = CodeParser::new(SupportedLanguage::Go).unwrap();
2099        let source = "func internalFunc() {}";
2100        let tree = parser.parse(source).unwrap();
2101        let root = tree.root_node();
2102
2103        let extractor = MetadataExtractor::new(SupportedLanguage::Go);
2104        let func_node = root.child(0).unwrap();
2105        let metadata = extractor.extract(&func_node, source.as_bytes());
2106
2107        assert_eq!(metadata.visibility, Some("private".to_string()));
2108    }
2109
2110    // ========================================================================
2111    // ManifestLanguage Tests
2112    // ========================================================================
2113
2114    #[test]
2115    fn test_manifest_language_from_filename_json() {
2116        assert_eq!(
2117            ManifestLanguage::from_filename("package.json"),
2118            Some(ManifestLanguage::Json)
2119        );
2120        assert_eq!(
2121            ManifestLanguage::from_filename("vcpkg.json"),
2122            Some(ManifestLanguage::Json)
2123        );
2124    }
2125
2126    #[test]
2127    fn test_manifest_language_from_filename_toml() {
2128        assert_eq!(
2129            ManifestLanguage::from_filename("Cargo.toml"),
2130            Some(ManifestLanguage::Toml)
2131        );
2132        assert_eq!(
2133            ManifestLanguage::from_filename("pyproject.toml"),
2134            Some(ManifestLanguage::Toml)
2135        );
2136    }
2137
2138    #[test]
2139    fn test_manifest_language_from_filename_gomod() {
2140        assert_eq!(
2141            ManifestLanguage::from_filename("go.mod"),
2142            Some(ManifestLanguage::GoMod)
2143        );
2144    }
2145
2146    #[test]
2147    fn test_manifest_language_from_filename_xml() {
2148        assert_eq!(
2149            ManifestLanguage::from_filename("MyProject.csproj"),
2150            Some(ManifestLanguage::Xml)
2151        );
2152        assert_eq!(
2153            ManifestLanguage::from_filename("Legacy.vbproj"),
2154            Some(ManifestLanguage::Xml)
2155        );
2156        assert_eq!(
2157            ManifestLanguage::from_filename("Functional.fsproj"),
2158            Some(ManifestLanguage::Xml)
2159        );
2160    }
2161
2162    #[test]
2163    fn test_manifest_language_from_filename_cmake() {
2164        assert_eq!(
2165            ManifestLanguage::from_filename("CMakeLists.txt"),
2166            Some(ManifestLanguage::CMake)
2167        );
2168    }
2169
2170    #[test]
2171    fn test_manifest_language_from_filename_not_recognized() {
2172        assert_eq!(ManifestLanguage::from_filename("README.md"), None);
2173        assert_eq!(ManifestLanguage::from_filename("main.rs"), None);
2174        assert_eq!(ManifestLanguage::from_filename("config.json"), None);
2175        assert_eq!(ManifestLanguage::from_filename("settings.toml"), None);
2176    }
2177
2178    #[test]
2179    fn test_manifest_language_from_path() {
2180        assert_eq!(
2181            ManifestLanguage::from_path(Path::new("packages/core/package.json")),
2182            Some(ManifestLanguage::Json)
2183        );
2184        assert_eq!(
2185            ManifestLanguage::from_path(Path::new("crates/codeprysm-core/Cargo.toml")),
2186            Some(ManifestLanguage::Toml)
2187        );
2188        assert_eq!(
2189            ManifestLanguage::from_path(Path::new("src/MyProject.csproj")),
2190            Some(ManifestLanguage::Xml)
2191        );
2192        assert_eq!(ManifestLanguage::from_path(Path::new("src/main.rs")), None);
2193    }
2194
2195    #[test]
2196    fn test_manifest_language_is_manifest_file() {
2197        assert!(ManifestLanguage::is_manifest_file(Path::new(
2198            "package.json"
2199        )));
2200        assert!(ManifestLanguage::is_manifest_file(Path::new("Cargo.toml")));
2201        assert!(ManifestLanguage::is_manifest_file(Path::new("go.mod")));
2202        assert!(ManifestLanguage::is_manifest_file(Path::new(
2203            "CMakeLists.txt"
2204        )));
2205        assert!(ManifestLanguage::is_manifest_file(Path::new(
2206            "MyApp.csproj"
2207        )));
2208        assert!(!ManifestLanguage::is_manifest_file(Path::new("main.py")));
2209        assert!(!ManifestLanguage::is_manifest_file(Path::new("README.md")));
2210    }
2211
2212    #[test]
2213    fn test_manifest_language_as_str() {
2214        assert_eq!(ManifestLanguage::Json.as_str(), "json");
2215        assert_eq!(ManifestLanguage::Toml.as_str(), "toml");
2216        assert_eq!(ManifestLanguage::GoMod.as_str(), "gomod");
2217        assert_eq!(ManifestLanguage::Xml.as_str(), "xml");
2218        assert_eq!(ManifestLanguage::CMake.as_str(), "cmake");
2219    }
2220
2221    #[test]
2222    fn test_manifest_language_display() {
2223        assert_eq!(format!("{}", ManifestLanguage::Json), "json");
2224        assert_eq!(format!("{}", ManifestLanguage::Toml), "toml");
2225        assert_eq!(format!("{}", ManifestLanguage::GoMod), "gomod");
2226        assert_eq!(format!("{}", ManifestLanguage::Xml), "xml");
2227        assert_eq!(format!("{}", ManifestLanguage::CMake), "cmake");
2228    }
2229
2230    #[test]
2231    fn test_manifest_language_tree_sitter_language() {
2232        // Verify each manifest language can create a working parser
2233        for lang in [
2234            ManifestLanguage::Json,
2235            ManifestLanguage::Toml,
2236            ManifestLanguage::GoMod,
2237            ManifestLanguage::Xml,
2238            ManifestLanguage::CMake,
2239        ] {
2240            let mut parser = Parser::new();
2241            let result = parser.set_language(&lang.tree_sitter_language());
2242            assert!(
2243                result.is_ok(),
2244                "Failed to set tree-sitter language for {:?}",
2245                lang
2246            );
2247        }
2248    }
2249
2250    #[test]
2251    fn test_manifest_language_parse_json() {
2252        let mut parser = Parser::new();
2253        parser
2254            .set_language(&ManifestLanguage::Json.tree_sitter_language())
2255            .unwrap();
2256
2257        let source = r#"{"name": "my-package", "version": "1.0.0"}"#;
2258        let tree = parser.parse(source, None).unwrap();
2259        assert!(!tree.root_node().has_error());
2260    }
2261
2262    #[test]
2263    fn test_manifest_language_parse_toml() {
2264        let mut parser = Parser::new();
2265        parser
2266            .set_language(&ManifestLanguage::Toml.tree_sitter_language())
2267            .unwrap();
2268
2269        let source = r#"[package]
2270name = "my-crate"
2271version = "0.1.0"
2272"#;
2273        let tree = parser.parse(source, None).unwrap();
2274        assert!(!tree.root_node().has_error());
2275    }
2276
2277    #[test]
2278    fn test_manifest_language_parse_gomod() {
2279        let mut parser = Parser::new();
2280        parser
2281            .set_language(&ManifestLanguage::GoMod.tree_sitter_language())
2282            .unwrap();
2283
2284        let source = r#"module github.com/example/mymodule
2285
2286go 1.21
2287"#;
2288        let tree = parser.parse(source, None).unwrap();
2289        assert!(!tree.root_node().has_error());
2290    }
2291
2292    #[test]
2293    fn test_manifest_language_parse_xml() {
2294        let mut parser = Parser::new();
2295        parser
2296            .set_language(&ManifestLanguage::Xml.tree_sitter_language())
2297            .unwrap();
2298
2299        let source = r#"<Project Sdk="Microsoft.NET.Sdk">
2300  <PropertyGroup>
2301    <AssemblyName>MyProject</AssemblyName>
2302  </PropertyGroup>
2303</Project>"#;
2304        let tree = parser.parse(source, None).unwrap();
2305        assert!(!tree.root_node().has_error());
2306    }
2307
2308    #[test]
2309    fn test_manifest_language_parse_cmake() {
2310        let mut parser = Parser::new();
2311        parser
2312            .set_language(&ManifestLanguage::CMake.tree_sitter_language())
2313            .unwrap();
2314
2315        let source = r#"cmake_minimum_required(VERSION 3.20)
2316project(my-project VERSION 1.0.0)
2317"#;
2318        let tree = parser.parse(source, None).unwrap();
2319        assert!(!tree.root_node().has_error());
2320    }
2321
2322    #[test]
2323    fn test_manifest_language_exact_filenames() {
2324        let filenames = ManifestLanguage::exact_filenames();
2325        assert!(filenames.contains(&"package.json"));
2326        assert!(filenames.contains(&"Cargo.toml"));
2327        assert!(filenames.contains(&"go.mod"));
2328        assert!(filenames.contains(&"CMakeLists.txt"));
2329    }
2330
2331    #[test]
2332    fn test_manifest_language_extensions() {
2333        let extensions = ManifestLanguage::manifest_extensions();
2334        assert!(extensions.contains(&"csproj"));
2335        assert!(extensions.contains(&"vbproj"));
2336        assert!(extensions.contains(&"fsproj"));
2337    }
2338}