use std::borrow::Cow;
use log::{debug, error, info, trace};
use tree_sitter::{
Language as TSLanguage, Parser as TSParser, Query as TSQuery, QueryCursor as TSQueryCursor,
QueryError as TSQueryError, StreamingIterator,
};
use super::Scoper;
use super::scope::RangesWithContext;
use crate::find::Find;
use crate::ranges::Ranges;
#[cfg(doc)]
use crate::scoping::{
scope::Scope::{In, Out},
view::ScopedViewBuilder,
};
pub mod c;
pub mod csharp;
pub mod go;
pub mod hcl;
pub mod python;
pub mod rust;
pub mod typescript;
#[derive(Debug, Clone)]
pub struct TreeSitterRegex(pub regex::bytes::Regex);
impl std::fmt::Display for TreeSitterRegex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let escaped = regex::escape(self.0.as_str());
write!(f, "{escaped}")
}
}
#[derive(Debug)]
struct CompiledQuery {
positive_query: TSQuery,
negative_query: Option<TSQuery>,
}
impl CompiledQuery {
fn from_source(lang: &TSLanguage, query: &QuerySource) -> Result<Self, TSQueryError> {
Self::from_str(lang, &query.0)
}
fn from_prepared_query(lang: &TSLanguage, query: &str) -> Self {
Self::from_str(lang, query).unwrap_or_else(|qe| {
error!("Failed to compile prepared query: {qe:?}");
panic!("syntax of prepared queries should be validated by tests, and injection of regex be protected by {}", stringify!(TreeSitterRegex))
})
}
fn from_str(lang: &TSLanguage, query: &str) -> Result<Self, TSQueryError> {
let positive_query = TSQuery::new(lang, query)?;
let is_ignored = |name: &str| name.starts_with(IGNORE);
let has_ignored_captures = positive_query
.capture_names()
.iter()
.any(|name| is_ignored(name));
let negative_query = has_ignored_captures
.then(|| {
let mut query = TSQuery::new(lang, query)?;
let acknowledged_captures = query
.capture_names()
.iter()
.filter(|name| !is_ignored(name))
.map(|s| String::from(*s))
.collect::<Vec<_>>();
for name in acknowledged_captures {
trace!("Disabling capture for: {name:?}");
query.disable_capture(&name);
}
Ok(query)
})
.transpose()?;
Ok(Self {
positive_query,
negative_query,
})
}
}
#[derive(Clone, Debug)]
pub struct QuerySource(Cow<'static, str>);
impl From<String> for QuerySource {
fn from(s: String) -> Self {
Self(s.into())
}
}
pub(super) const IGNORE: &str = "_SRGN_IGNORE";
pub trait LanguageScoper: Scoper + Find + Send + Sync {
fn lang() -> TSLanguage
where
Self: Sized;
fn pos_query(&self) -> &TSQuery
where
Self: Sized;
fn neg_query(&self) -> Option<&TSQuery>
where
Self: Sized;
#[must_use]
fn parser() -> TSParser
where
Self: Sized, {
let mut parser = TSParser::new();
parser
.set_language(&Self::lang())
.expect("Should be able to load language grammar and parser");
parser
}
fn scope_via_query(&self, input: &str) -> Ranges<usize>
where
Self: Sized, {
let old_tree = None;
trace!("Parsing into AST: {input:?}");
let tree = Self::parser()
.parse(input, old_tree)
.expect("No language set in parser, or other unrecoverable error");
let root = tree.root_node();
debug!(
"S expression of parsed source code is: {:?}",
root.to_sexp()
);
let run = |query: &TSQuery| {
trace!("Running query: {query:?}");
let mut qc = TSQueryCursor::new();
let mut matches = qc.matches(query, root, input.as_bytes());
let mut ranges: Ranges<usize> = {
let mut ranges = Vec::with_capacity(matches.size_hint().1.unwrap_or_default());
while let Some(m) = matches.next() {
for capture in m.captures {
ranges.push(capture.node.byte_range());
}
}
ranges.into_iter().collect()
};
ranges.merge();
trace!("Querying yielded ranges: {ranges:?}");
ranges
};
let ranges = run(self.pos_query());
match &self.neg_query() {
Some(nq) => ranges - run(nq),
None => ranges,
}
}
}
impl<T> Scoper for T
where
T: LanguageScoper,
{
fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> {
self.scope_via_query(input).into()
}
}
impl Scoper for Box<dyn LanguageScoper> {
fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> {
self.as_ref().scope_raw(input)
}
}
impl Scoper for &[Box<dyn LanguageScoper>] {
fn scope_raw<'viewee>(&self, input: &'viewee str) -> RangesWithContext<'viewee> {
trace!("Scoping many scopes: {input:?}");
if self.is_empty() {
trace!("Short-circuiting: self is empty, nothing to scope.");
return vec![(0..input.len(), None)].into_iter().collect();
}
let mut ranges: Ranges<usize> = self
.iter()
.flat_map(|s| s.scope_raw(input))
.map(|(range, ctx)| {
assert!(
ctx.is_none(),
"When language scoping runs, no contexts exist yet."
);
range
})
.collect();
ranges.merge();
info!("New ranges after scoping many: {ranges:?}");
let ranges: RangesWithContext<'_> = ranges.into_iter().map(|r| (r, None)).collect();
ranges
}
}