use crate::lang::detect_language;
use crate::parser::get_parser;
use crate::queries::get_queries;
use crate::tag::{Tag, TagKind};
use std::path::Path;
use tree_sitter::{Query, QueryCursor, StreamingIterator};
pub fn extract_tags(fname: &Path, rel_fname: &str, content: &str) -> Vec<Tag> {
if content.is_empty() {
return Vec::new();
}
let lang = match detect_language(fname) {
Some(l) => l,
None => return Vec::new(),
};
let mut parser = match get_parser(lang) {
Some(p) => p,
None => return Vec::new(),
};
let query_pair = match get_queries(lang) {
Some(q) => q,
None => return Vec::new(),
};
let tree = match parser.parse(content, None) {
Some(t) => t,
None => return Vec::new(),
};
let ts_language = match crate::parser::get_language(lang) {
Some(l) => l,
None => return Vec::new(),
};
let query = match Query::new(&ts_language, query_pair.tags) {
Ok(q) => q,
Err(_) => return Vec::new(),
};
let source_bytes = content.as_bytes();
let root_node = tree.root_node();
let mut tags = Vec::new();
let mut saw_def = false;
let mut saw_ref = false;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, root_node, source_bytes);
let capture_names = query.capture_names();
while let Some(m) = matches.next() {
for capture in m.captures {
let capture_name = &capture_names[capture.index as usize];
let node = capture.node;
let name = match node.utf8_text(source_bytes) {
Ok(s) => s.to_string(),
Err(_) => continue,
};
let line = (node.start_position().row + 1) as i32;
let kind = if capture_name.starts_with("name.definition.") {
saw_def = true;
TagKind::Def
} else if capture_name.starts_with("name.reference.") {
saw_ref = true;
TagKind::Ref
} else {
continue;
};
tags.push(Tag::new(
rel_fname.to_string(),
fname.to_string_lossy().to_string(),
line,
name,
kind,
));
}
}
if saw_def
&& !saw_ref
&& let Some(idents_query_src) = query_pair.idents
{
let fallback_tags = extract_idents_fallback(
&ts_language,
idents_query_src,
&root_node,
source_bytes,
rel_fname,
&fname.to_string_lossy(),
);
tags.extend(fallback_tags);
}
tags
}
fn extract_idents_fallback(
language: &tree_sitter::Language,
query_src: &str,
root_node: &tree_sitter::Node,
source_bytes: &[u8],
rel_fname: &str,
fname: &str,
) -> Vec<Tag> {
let query = match Query::new(language, query_src) {
Ok(q) => q,
Err(_) => return Vec::new(),
};
let mut tags = Vec::new();
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root_node, source_bytes);
let capture_names = query.capture_names();
while let Some(m) = matches.next() {
for capture in m.captures {
let capture_name = &capture_names[capture.index as usize];
if !capture_name.starts_with("name.reference.") {
continue;
}
let node = capture.node;
let name = match node.utf8_text(source_bytes) {
Ok(s) => s.to_string(),
Err(_) => continue,
};
tags.push(Tag::new(
rel_fname.to_string(),
fname.to_string(),
-1,
name,
TagKind::Ref,
));
}
}
tags
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn extract_rust_tags() {
let content = r#"
fn main() {
println!("hello");
}
struct Foo {
bar: i32,
}
impl Foo {
fn new() -> Self {
Self { bar: 0 }
}
}
"#;
let tags = extract_tags(Path::new("test.rs"), "test.rs", content);
let defs: Vec<_> = tags.iter().filter(|t| t.is_def()).collect();
assert!(defs.iter().any(|t| t.name == "main"));
assert!(defs.iter().any(|t| t.name == "Foo"));
assert!(defs.iter().any(|t| t.name == "new"));
let refs: Vec<_> = tags.iter().filter(|t| t.is_ref()).collect();
assert!(refs.iter().any(|t| t.name == "println"));
}
#[test]
fn extract_python_tags() {
let content = r#"
class MyClass:
def method(self):
pass
def my_function():
helper()
CONSTANT = 42
"#;
let tags = extract_tags(Path::new("test.py"), "test.py", content);
let defs: Vec<_> = tags.iter().filter(|t| t.is_def()).collect();
assert!(defs.iter().any(|t| t.name == "MyClass"));
assert!(defs.iter().any(|t| t.name == "method"));
assert!(defs.iter().any(|t| t.name == "my_function"));
let refs: Vec<_> = tags.iter().filter(|t| t.is_ref()).collect();
assert!(refs.iter().any(|t| t.name == "helper"));
}
#[test]
fn extract_typescript_fallback() {
let content = r#"
class MyClass {
constructor() {}
doStuff() {
helper();
}
}
"#;
let tags = extract_tags(Path::new("test.ts"), "test.ts", content);
let defs: Vec<_> = tags.iter().filter(|t| t.is_def()).collect();
assert!(defs.iter().any(|t| t.name == "MyClass"));
let refs: Vec<_> = tags.iter().filter(|t| t.is_ref()).collect();
assert!(!refs.is_empty());
assert!(refs.iter().all(|t| t.line == -1));
}
#[test]
fn extract_unknown_language() {
let tags = extract_tags(Path::new("data.csv"), "data.csv", "a,b,c\n1,2,3");
assert!(tags.is_empty());
}
#[test]
fn extract_empty_file() {
let tags = extract_tags(Path::new("empty.rs"), "empty.rs", "");
assert!(tags.is_empty());
}
#[test]
fn line_numbers_are_1_indexed() {
let content = "fn foo() {}\n";
let tags = extract_tags(Path::new("test.rs"), "test.rs", content);
let foo = tags.iter().find(|t| t.name == "foo").unwrap();
assert_eq!(foo.line, 1); }
}