Skip to main content

ast_doc_core/parser/
mod.rs

1//! Phase 2: AST parsing and strategy extraction.
2//!
3//! Uses tree-sitter to parse source files and pre-compute
4//! Full/NoTests/Summary strategy variants with token counts.
5
6pub mod lang;
7pub mod strategy;
8
9use std::{
10    collections::HashMap,
11    path::{Path, PathBuf},
12};
13
14use crate::{config::OutputStrategy, error::AstDocError, ingestion::DiscoveredFile};
15
16/// Supported programming languages.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum Language {
19    /// Rust source files (.rs).
20    Rust,
21    /// Python source files (.py).
22    Python,
23    /// TypeScript/JavaScript source files (.ts, .tsx, .js, .jsx).
24    TypeScript,
25    /// Go source files (.go).
26    Go,
27    /// C source files (.c, .h).
28    C,
29}
30
31impl std::fmt::Display for Language {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            Self::Rust => write!(f, "Rust"),
35            Self::Python => write!(f, "Python"),
36            Self::TypeScript => write!(f, "TypeScript"),
37            Self::Go => write!(f, "Go"),
38            Self::C => write!(f, "C"),
39        }
40    }
41}
42
43/// Pre-computed content and token count for a single strategy.
44#[derive(Debug, Clone)]
45pub struct StrategyData {
46    /// The rendered source text for this strategy.
47    pub content: String,
48    /// Token count of content (computed once via tiktoken-rs during parsing).
49    pub token_count: usize,
50}
51
52/// A parsed file with pre-computed strategy data for all output modes.
53#[derive(Debug, Clone)]
54pub struct ParsedFile {
55    /// Relative path from the project root.
56    pub path: PathBuf,
57    /// Detected language.
58    pub language: Language,
59    /// Original source content.
60    pub source: String,
61    /// Pre-computed strategy data for each output mode.
62    pub strategies_data: HashMap<OutputStrategy, StrategyData>,
63}
64
65/// Trait for language-specific parsers.
66pub trait LanguageParser {
67    /// Parse the source code and produce a `ParsedFile`.
68    ///
69    /// # Errors
70    ///
71    /// Returns an error if tree-sitter parsing fails.
72    fn parse(&self, source: &str, path: &Path) -> Result<ParsedFile, AstDocError>;
73}
74
75/// Detect the language from a file extension.
76#[must_use]
77pub fn detect_language(path: &Path) -> Option<Language> {
78    match path.extension().and_then(|e| e.to_str()) {
79        Some("rs") => Some(Language::Rust),
80        Some("py") => Some(Language::Python),
81        Some("ts" | "tsx" | "js" | "jsx") => Some(Language::TypeScript),
82        Some("go") => Some(Language::Go),
83        Some("c" | "h") => Some(Language::C),
84        _ => None,
85    }
86}
87
88/// Parse a discovered file into a `ParsedFile`.
89///
90/// Dispatches to the appropriate language parser based on the detected language.
91///
92/// # Errors
93///
94/// Returns an error if the language feature is not enabled or parsing fails.
95pub fn parse_file(file: &DiscoveredFile, lang: Language) -> Result<ParsedFile, AstDocError> {
96    match lang {
97        #[cfg(feature = "lang-rust")]
98        Language::Rust => lang::rust_parser::RustParser::new().parse(&file.content, &file.path),
99        #[cfg(feature = "lang-python")]
100        Language::Python => {
101            lang::python_parser::PythonParser::new().parse(&file.content, &file.path)
102        }
103        #[cfg(feature = "lang-typescript")]
104        Language::TypeScript => {
105            lang::typescript_parser::TypeScriptParser::new().parse(&file.content, &file.path)
106        }
107        #[cfg(feature = "lang-go")]
108        Language::Go => lang::go_parser::GoParser::new().parse(&file.content, &file.path),
109        #[cfg(feature = "lang-c")]
110        Language::C => lang::c_parser::CParser::new().parse(&file.content, &file.path),
111        #[cfg(not(all(
112            feature = "lang-rust",
113            feature = "lang-python",
114            feature = "lang-typescript",
115            feature = "lang-go",
116            feature = "lang-c"
117        )))]
118        _ => Err(AstDocError::UnsupportedLanguage { language: lang.to_string() }),
119    }
120}
121
122#[cfg(test)]
123#[expect(clippy::unwrap_used)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn test_detect_language_rust() {
129        assert_eq!(detect_language(Path::new("main.rs")), Some(Language::Rust));
130    }
131
132    #[test]
133    fn test_detect_language_python() {
134        assert_eq!(detect_language(Path::new("app.py")), Some(Language::Python));
135    }
136
137    #[test]
138    fn test_detect_language_typescript() {
139        assert_eq!(detect_language(Path::new("index.ts")), Some(Language::TypeScript));
140        assert_eq!(detect_language(Path::new("app.tsx")), Some(Language::TypeScript));
141        assert_eq!(detect_language(Path::new("script.js")), Some(Language::TypeScript));
142    }
143
144    #[test]
145    fn test_detect_language_go() {
146        assert_eq!(detect_language(Path::new("main.go")), Some(Language::Go));
147    }
148
149    #[test]
150    fn test_detect_language_c() {
151        assert_eq!(detect_language(Path::new("main.c")), Some(Language::C));
152        assert_eq!(detect_language(Path::new("header.h")), Some(Language::C));
153    }
154
155    #[test]
156    fn test_detect_language_unknown() {
157        assert_eq!(detect_language(Path::new("readme.md")), None);
158        assert_eq!(detect_language(Path::new("data.json")), None);
159    }
160
161    #[cfg(feature = "lang-rust")]
162    #[test]
163    fn test_parse_file_rust() {
164        let file = DiscoveredFile {
165            path: PathBuf::from("src/main.rs"),
166            content: "fn main() {\n    println!(\"hello\");\n}\n".to_string(),
167            language: Some(Language::Rust),
168            raw_token_count: 10,
169        };
170        let result = parse_file(&file, Language::Rust).unwrap();
171        assert_eq!(result.language, Language::Rust);
172        assert!(result.strategies_data.contains_key(&OutputStrategy::Full));
173        assert!(result.strategies_data.contains_key(&OutputStrategy::NoTests));
174        assert!(result.strategies_data.contains_key(&OutputStrategy::Summary));
175    }
176}