Skip to main content

panproto_parse/
scope_detector.rs

1//! Grammar-driven named-scope detection via tree-sitter `tags.scm` queries.
2//!
3//! Every tree-sitter grammar ships a `queries/tags.scm` file, consumed by
4//! GitHub code navigation, Helix, and the `tree-sitter tags` CLI. The capture
5//! vocabulary is standardized:
6//!
7//! - `@definition.function` / `@definition.method` / `@definition.class` /
8//!   `@definition.module` / `@definition.interface` / `@definition.type` /
9//!   `@definition.macro` (and more): the scope node
10//! - `@name`: the identifier within the scope node
11//!
12//! This module wraps `tree-sitter-tags` to produce a uniform [`NamedScope`]
13//! view of source code across all 248 supported languages. The walker uses
14//! the resulting scope map to drive named-scope detection without any
15//! hardcoded node-kind lists.
16
17use std::ops::Range;
18
19use tree_sitter::Language;
20use tree_sitter_tags::{TagsConfiguration, TagsContext};
21
22use crate::error::ParseError;
23
24/// A named scope discovered by the tags query.
25///
26/// Represents whatever the grammar's `tags.scm` labels with an
27/// `@definition.*` capture paired with `@name`: functions, classes,
28/// methods, modules, types, interfaces, macros, or custom definitions.
29#[derive(Debug, Clone)]
30pub struct NamedScope {
31    /// Byte range of the scope node (e.g. the whole `fn foo() { ... }`).
32    pub node_range: Range<usize>,
33    /// Byte range of the name identifier inside the scope.
34    pub name_range: Range<usize>,
35    /// The identifier text (e.g. `"foo"`), resolved from `name_range`.
36    pub name: String,
37    /// Grammar-declared kind: the `@definition.X` capture suffix
38    /// (`"function"`, `"method"`, `"class"`, `"module"`, `"interface"`,
39    /// `"type"`, `"macro"`, or any custom suffix the grammar defines).
40    pub kind: ScopeKind,
41}
42
43/// Grammar-declared scope kind, parsed from the `@definition.*` capture.
44///
45/// Named variants cover the standard tree-sitter tags vocabulary; [`Other`]
46/// holds any additional suffix a grammar defines.
47///
48/// [`Other`]: ScopeKind::Other
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub enum ScopeKind {
51    /// `@definition.function`
52    Function,
53    /// `@definition.method`
54    Method,
55    /// `@definition.class`
56    Class,
57    /// `@definition.module`
58    Module,
59    /// `@definition.interface`
60    Interface,
61    /// `@definition.type`
62    Type,
63    /// `@definition.macro`
64    Macro,
65    /// Any other `@definition.X` suffix the grammar defines.
66    Other(String),
67}
68
69impl ScopeKind {
70    /// Construct from the `@definition.X` capture suffix.
71    #[must_use]
72    pub fn from_suffix(s: &str) -> Self {
73        match s {
74            "function" => Self::Function,
75            "method" => Self::Method,
76            "class" => Self::Class,
77            "module" => Self::Module,
78            "interface" => Self::Interface,
79            "type" => Self::Type,
80            "macro" => Self::Macro,
81            other => Self::Other(other.to_owned()),
82        }
83    }
84
85    /// The canonical capture suffix for this kind.
86    #[must_use]
87    pub fn as_suffix(&self) -> &str {
88        match self {
89            Self::Function => "function",
90            Self::Method => "method",
91            Self::Class => "class",
92            Self::Module => "module",
93            Self::Interface => "interface",
94            Self::Type => "type",
95            Self::Macro => "macro",
96            Self::Other(s) => s.as_str(),
97        }
98    }
99}
100
101/// A reusable per-language detector that runs a tags query over source bytes
102/// and yields [`NamedScope`]s.
103///
104/// Construct once per grammar (the query is compiled inside
105/// [`TagsConfiguration::new`]); reuse across many files. The internal
106/// [`TagsContext`] holds a tree-sitter `Parser` and `QueryCursor` that are
107/// reset between calls.
108pub struct ScopeDetector {
109    config: Option<TagsConfiguration>,
110    ctx: TagsContext,
111}
112
113impl std::fmt::Debug for ScopeDetector {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        // `TagsContext` doesn't implement Debug; we only expose presence of
116        // the config. `finish_non_exhaustive` documents the omission to the
117        // reader (and to clippy's missing-fields lint).
118        f.debug_struct("ScopeDetector")
119            .field("has_config", &self.config.is_some())
120            .finish_non_exhaustive()
121    }
122}
123
124impl ScopeDetector {
125    /// Build a detector from a grammar's tags query.
126    ///
127    /// Pass `None` for `base_query` if the grammar does not ship a
128    /// `queries/tags.scm`; the detector is then a no-op (always returns an
129    /// empty scope list). Pass `Some(project_override)` to concatenate an
130    /// override query in front of the base (tree-sitter unions all patterns,
131    /// so override captures augment the grammar defaults).
132    ///
133    /// # Errors
134    ///
135    /// Returns [`ParseError::ScopeQueryCompile`] if the combined query fails
136    /// to compile against the given language (malformed S-expression, unknown
137    /// capture name outside the tags-query vocabulary, etc.).
138    pub fn new(
139        language: &Language,
140        base_query: Option<&str>,
141        project_override: Option<&str>,
142    ) -> Result<Self, ParseError> {
143        let combined = match (base_query, project_override) {
144            (None, None) => {
145                return Ok(Self {
146                    config: None,
147                    ctx: TagsContext::new(),
148                });
149            }
150            (Some(base), None) => base.to_owned(),
151            (None, Some(ov)) => ov.to_owned(),
152            (Some(base), Some(ov)) => format!("{ov}\n{base}"),
153        };
154
155        let config = TagsConfiguration::new(language.clone(), &combined, "").map_err(|e| {
156            ParseError::ScopeQueryCompile {
157                reason: e.to_string(),
158            }
159        })?;
160
161        Ok(Self {
162            config: Some(config),
163            ctx: TagsContext::new(),
164        })
165    }
166
167    /// True when this detector has a compiled tags query and will produce
168    /// scopes. A detector constructed from `(None, None)` is a no-op.
169    #[must_use]
170    pub const fn has_query(&self) -> bool {
171        self.config.is_some()
172    }
173
174    /// Run the tags query over `source` and return every `@definition.*`
175    /// match as a [`NamedScope`].
176    ///
177    /// Non-definition captures (`@reference.*`, etc.) are filtered out: they
178    /// describe call sites, not scopes. `@ignore` matches (used by grammars
179    /// like Elixir to suppress false positives) are handled internally by
180    /// `tree-sitter-tags`.
181    ///
182    /// Ordering mirrors tree-sitter's match order (roughly source order,
183    /// with potential reordering when patterns overlap). Callers that need a
184    /// deterministic byte-ordered index should sort the result.
185    #[must_use]
186    pub fn scopes(&mut self, source: &[u8]) -> Vec<NamedScope> {
187        let Some(config) = self.config.as_ref() else {
188            return Vec::new();
189        };
190
191        let Ok((iter, _had_parse_error)) = self.ctx.generate_tags(config, source, None) else {
192            return Vec::new();
193        };
194
195        let mut scopes = Vec::new();
196        for tag_result in iter {
197            let Ok(tag) = tag_result else { continue };
198            if !tag.is_definition {
199                continue;
200            }
201
202            let syntax = config.syntax_type_name(tag.syntax_type_id);
203            let kind = ScopeKind::from_suffix(syntax);
204
205            let Some(name_bytes) = source.get(tag.name_range.clone()) else {
206                continue;
207            };
208            let Ok(name) = std::str::from_utf8(name_bytes) else {
209                continue;
210            };
211
212            scopes.push(NamedScope {
213                node_range: tag.range,
214                name_range: tag.name_range,
215                name: name.to_owned(),
216                kind,
217            });
218        }
219        scopes
220    }
221}
222
223#[cfg(test)]
224#[allow(clippy::unwrap_used)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn no_query_is_empty() {
230        // Build a detector without any query: scopes() should always return [].
231        // We use the rust grammar only to supply a Language for the (unused) ctx;
232        // the detector with no config should not invoke it.
233        #[cfg(feature = "grammars")]
234        {
235            let lang = panproto_grammars::grammars()
236                .into_iter()
237                .find(|g| g.name == "rust")
238                .map(|g| g.language);
239            if let Some(lang) = lang {
240                let mut det = ScopeDetector::new(&lang, None, None).unwrap();
241                assert!(!det.has_query());
242                assert!(det.scopes(b"fn f() {}").is_empty());
243            }
244        }
245    }
246
247    #[test]
248    #[cfg(feature = "grammars")]
249    fn rust_function_item_is_detected() {
250        let grammar = panproto_grammars::grammars()
251            .into_iter()
252            .find(|g| g.name == "rust");
253        let Some(g) = grammar else {
254            return; // rust grammar not enabled in this feature set
255        };
256        let tags = g.tags_query;
257        if tags.is_none() {
258            return; // grammar was fetched without queries/tags.scm
259        }
260        let mut det = ScopeDetector::new(&g.language, tags, None).unwrap();
261        assert!(det.has_query());
262
263        let source = b"fn verify_push(token: &str) -> bool { true }\n\
264                       struct Foo { x: u32 }\n";
265        let scopes = det.scopes(source);
266        let names: Vec<&str> = scopes.iter().map(|s| s.name.as_str()).collect();
267        assert!(names.contains(&"verify_push"), "got {names:?}");
268        assert!(names.contains(&"Foo"), "got {names:?}");
269
270        let fn_scope = scopes.iter().find(|s| s.name == "verify_push").unwrap();
271        assert_eq!(fn_scope.kind, ScopeKind::Function);
272    }
273
274    #[test]
275    #[cfg(feature = "grammars")]
276    fn rust_impl_method_is_detected_as_method() {
277        let Some(g) = panproto_grammars::grammars()
278            .into_iter()
279            .find(|g| g.name == "rust")
280        else {
281            return;
282        };
283        let Some(tags) = g.tags_query else {
284            return;
285        };
286        let mut det = ScopeDetector::new(&g.language, Some(tags), None).unwrap();
287
288        let source = b"impl Foo { fn bar(&self) {} }";
289        let scopes = det.scopes(source);
290        let bar = scopes.iter().find(|s| s.name == "bar");
291        assert!(bar.is_some(), "expected bar method, got {scopes:?}");
292        // Most rust tags.scm versions label impl methods as @definition.method;
293        // we accept either Method or Function to tolerate upstream variation.
294        let k = &bar.unwrap().kind;
295        assert!(matches!(k, ScopeKind::Method | ScopeKind::Function));
296    }
297}