acp/bridge/
detector.rs

1//! @acp:module "Format Detector"
2//! @acp:summary "RFC-0006: Auto-detects documentation format from content"
3//! @acp:domain cli
4//! @acp:layer service
5
6use super::config::{BridgeConfig, DocstringStyle};
7use crate::cache::SourceFormat;
8use regex::Regex;
9
10/// @acp:summary "Detects documentation format from content"
11pub struct FormatDetector {
12    config: BridgeConfig,
13    // Compiled regexes for detection
14    numpy_pattern: Regex,
15    sphinx_pattern: Regex,
16    google_pattern: Regex,
17    jsdoc_pattern: Regex,
18    rustdoc_section_pattern: Regex,
19}
20
21impl FormatDetector {
22    /// @acp:summary "Create a new format detector with configuration"
23    pub fn new(config: &BridgeConfig) -> Self {
24        Self {
25            config: config.clone(),
26            // NumPy: Section headers with underlines
27            numpy_pattern: Regex::new(r"(?m)^\s*(Parameters|Returns|Raises|Yields|Examples?|Notes?|Attributes?)\s*\n\s*-{3,}").unwrap(),
28            // Sphinx: :param:, :returns:, :raises: tags
29            sphinx_pattern: Regex::new(r":(param|returns?|raises?|type|rtype)\s+").unwrap(),
30            // Google: Args:, Returns:, Raises: sections
31            google_pattern: Regex::new(r"(?m)^\s*(Args|Arguments|Parameters|Returns|Raises|Yields|Examples?|Attributes?):\s*$").unwrap(),
32            // JSDoc: @param, @returns, etc.
33            jsdoc_pattern: Regex::new(r"@(param|returns?|throws?|deprecated|example|see)\b").unwrap(),
34            // Rustdoc: # Arguments, # Returns, etc.
35            rustdoc_section_pattern: Regex::new(r"(?m)^#\s*(Arguments?|Returns?|Panics?|Errors?|Examples?|Safety)\s*$").unwrap(),
36        }
37    }
38
39    /// @acp:summary "Detect documentation format from content and language"
40    pub fn detect(&self, content: &str, language: &str) -> Option<SourceFormat> {
41        if !self.config.enabled {
42            return None;
43        }
44
45        match language.to_lowercase().as_str() {
46            "javascript" | "typescript" | "js" | "ts" => {
47                if self.config.jsdoc.enabled {
48                    self.detect_jsdoc(content)
49                } else {
50                    None
51                }
52            }
53            "python" | "py" => {
54                if self.config.python.enabled {
55                    self.detect_python_docstring(content)
56                } else {
57                    None
58                }
59            }
60            "rust" | "rs" => {
61                if self.config.rust.enabled {
62                    self.detect_rustdoc(content)
63                } else {
64                    None
65                }
66            }
67            "java" | "kotlin" => Some(SourceFormat::Javadoc),
68            "go" => Some(SourceFormat::Godoc),
69            _ => None,
70        }
71    }
72
73    /// @acp:summary "Detect JSDoc format"
74    fn detect_jsdoc(&self, content: &str) -> Option<SourceFormat> {
75        if self.jsdoc_pattern.is_match(content) {
76            Some(SourceFormat::Jsdoc)
77        } else {
78            None
79        }
80    }
81
82    /// @acp:summary "Detect Python docstring style"
83    pub fn detect_python_docstring(&self, content: &str) -> Option<SourceFormat> {
84        // Check explicit configuration first
85        match self.config.python.docstring_style {
86            DocstringStyle::Google => return Some(SourceFormat::DocstringGoogle),
87            DocstringStyle::Numpy => return Some(SourceFormat::DocstringNumpy),
88            DocstringStyle::Sphinx => return Some(SourceFormat::DocstringSphinx),
89            DocstringStyle::Auto => {} // Continue to auto-detection
90        }
91
92        // Auto-detect from content patterns
93        self.auto_detect_docstring_style(content)
94    }
95
96    /// @acp:summary "Auto-detect docstring style from content patterns"
97    fn auto_detect_docstring_style(&self, content: &str) -> Option<SourceFormat> {
98        // Priority order: NumPy (most distinctive), Sphinx, Google
99
100        // NumPy: Section headers with underlines (most distinctive)
101        if self.numpy_pattern.is_match(content) {
102            return Some(SourceFormat::DocstringNumpy);
103        }
104
105        // Sphinx: :param:, :returns:, :raises: tags
106        if self.sphinx_pattern.is_match(content) {
107            return Some(SourceFormat::DocstringSphinx);
108        }
109
110        // Google: Args:, Returns:, Raises: sections
111        if self.google_pattern.is_match(content) {
112            return Some(SourceFormat::DocstringGoogle);
113        }
114
115        // No recognizable format found - might be plain docstring
116        None
117    }
118
119    /// @acp:summary "Detect Rust doc format"
120    fn detect_rustdoc(&self, content: &str) -> Option<SourceFormat> {
121        // Rust doc comments with sections
122        if self.rustdoc_section_pattern.is_match(content) {
123            return Some(SourceFormat::Rustdoc);
124        }
125
126        // Any /// or //! comment is considered rustdoc
127        if content.contains("///") || content.contains("//!") {
128            return Some(SourceFormat::Rustdoc);
129        }
130
131        None
132    }
133
134    /// @acp:summary "Check if content has any documentation comments"
135    pub fn has_documentation(&self, content: &str, language: &str) -> bool {
136        match language.to_lowercase().as_str() {
137            "javascript" | "typescript" | "js" | "ts" => {
138                content.contains("/**")
139                    || content.contains("@param")
140                    || content.contains("@returns")
141            }
142            "python" | "py" => content.contains("\"\"\"") || content.contains("'''"),
143            "rust" | "rs" => content.contains("///") || content.contains("//!"),
144            "java" | "kotlin" => content.contains("/**"),
145            "go" => {
146                // Go doc comments are // directly before declaration
147                content.lines().any(|line| {
148                    let trimmed = line.trim();
149                    trimmed.starts_with("//") && !trimmed.starts_with("// +build")
150                })
151            }
152            _ => false,
153        }
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    fn enabled_config() -> BridgeConfig {
162        BridgeConfig::enabled()
163    }
164
165    #[test]
166    fn test_detect_jsdoc() {
167        let detector = FormatDetector::new(&enabled_config());
168
169        let jsdoc = r#"
170            /**
171             * @param {string} name - The name
172             * @returns {User} The user
173             */
174        "#;
175        assert_eq!(
176            detector.detect(jsdoc, "typescript"),
177            Some(SourceFormat::Jsdoc)
178        );
179    }
180
181    #[test]
182    fn test_detect_google_docstring() {
183        let detector = FormatDetector::new(&enabled_config());
184
185        let google = r#"
186            """Search for users.
187
188            Args:
189                query: Search query string.
190                limit: Maximum results.
191
192            Returns:
193                List of matching users.
194            """
195        "#;
196        assert_eq!(
197            detector.detect(google, "python"),
198            Some(SourceFormat::DocstringGoogle)
199        );
200    }
201
202    #[test]
203    fn test_detect_numpy_docstring() {
204        let detector = FormatDetector::new(&enabled_config());
205
206        let numpy = r#"
207            """Search for users.
208
209            Parameters
210            ----------
211            query : str
212                Search query string.
213            limit : int, optional
214                Maximum results.
215
216            Returns
217            -------
218            list
219                List of matching users.
220            """
221        "#;
222        assert_eq!(
223            detector.detect(numpy, "python"),
224            Some(SourceFormat::DocstringNumpy)
225        );
226    }
227
228    #[test]
229    fn test_detect_sphinx_docstring() {
230        let detector = FormatDetector::new(&enabled_config());
231
232        let sphinx = r#"
233            """Search for users.
234
235            :param query: Search query string.
236            :type query: str
237            :param limit: Maximum results.
238            :returns: List of matching users.
239            :rtype: list
240            """
241        "#;
242        assert_eq!(
243            detector.detect(sphinx, "python"),
244            Some(SourceFormat::DocstringSphinx)
245        );
246    }
247
248    #[test]
249    fn test_detect_rustdoc() {
250        let detector = FormatDetector::new(&enabled_config());
251
252        let rustdoc = r#"
253            /// Search for users in the database.
254            ///
255            /// # Arguments
256            ///
257            /// * `query` - Search query string
258            /// * `limit` - Maximum results
259            ///
260            /// # Returns
261            ///
262            /// A vector of matching users.
263        "#;
264        assert_eq!(
265            detector.detect(rustdoc, "rust"),
266            Some(SourceFormat::Rustdoc)
267        );
268    }
269
270    #[test]
271    fn test_detect_disabled() {
272        let config = BridgeConfig::new(); // disabled by default
273        let detector = FormatDetector::new(&config);
274
275        let jsdoc = "@param {string} name - The name";
276        assert_eq!(detector.detect(jsdoc, "typescript"), None);
277    }
278
279    #[test]
280    fn test_detect_language_disabled() {
281        let mut config = BridgeConfig::enabled();
282        config.python.enabled = false;
283        let detector = FormatDetector::new(&config);
284
285        let google = "Args:\n    query: Search query.";
286        assert_eq!(detector.detect(google, "python"), None);
287    }
288
289    #[test]
290    fn test_has_documentation() {
291        let detector = FormatDetector::new(&enabled_config());
292
293        assert!(detector.has_documentation("/** @param x */", "typescript"));
294        assert!(detector.has_documentation("'''docstring'''", "python"));
295        assert!(detector.has_documentation("/// doc comment", "rust"));
296        assert!(!detector.has_documentation("// regular comment", "typescript"));
297    }
298}