pub mod regex_decompose;
use crate::tokenizer::build_covering;
#[derive(Debug, Clone)]
pub enum GramQuery {
And(Vec<GramQuery>),
Or(Vec<GramQuery>),
Grams(Vec<u64>),
All,
None,
}
impl GramQuery {
pub fn simplify(self) -> Self {
match self {
GramQuery::And(children) => {
let simplified: Vec<GramQuery> = children
.into_iter()
.map(|c| c.simplify())
.filter(|c| !matches!(c, GramQuery::All))
.collect();
match simplified.len() {
0 => GramQuery::All,
1 => simplified.into_iter().next().unwrap(),
_ => GramQuery::And(simplified),
}
}
GramQuery::Or(children) => {
let simplified: Vec<GramQuery> =
children.into_iter().map(|c| c.simplify()).collect();
if simplified.iter().any(|c| matches!(c, GramQuery::All)) {
return GramQuery::All;
}
match simplified.len() {
0 => GramQuery::None,
1 => simplified.into_iter().next().unwrap(),
_ => GramQuery::Or(simplified),
}
}
other => other,
}
}
}
#[derive(Debug, Clone)]
pub enum QueryRoute {
Literal,
IndexedRegex(GramQuery),
FullScan,
SymbolSearch {
name: String,
kind_filter: Option<String>,
},
}
pub fn route_query(pattern: &str, case_insensitive: bool) -> Result<QueryRoute, String> {
if let Some(rest) = pattern
.strip_prefix("sym:")
.or_else(|| pattern.strip_prefix("ref:"))
{
return Ok(QueryRoute::SymbolSearch {
name: rest.to_string(),
kind_filter: None,
});
}
if let Some(rest) = pattern.strip_prefix("def:") {
return Ok(QueryRoute::SymbolSearch {
name: rest.to_string(),
kind_filter: Some("function".to_string()),
});
}
if case_insensitive && is_literal(pattern) {
return Ok(match build_covering(pattern.as_bytes()) {
Some(grams) => QueryRoute::IndexedRegex(GramQuery::Grams(grams)),
None => QueryRoute::FullScan,
});
}
if !case_insensitive && is_literal(pattern) {
return Ok(QueryRoute::Literal);
}
let gram_query = regex_decompose::decompose(pattern, case_insensitive)?;
let gram_query = gram_query.simplify();
Ok(match gram_query {
GramQuery::All | GramQuery::None => QueryRoute::FullScan,
q => QueryRoute::IndexedRegex(q),
})
}
pub fn is_literal(pattern: &str) -> bool {
!pattern.chars().any(|c| {
matches!(
c,
'.' | '*' | '+' | '?' | '[' | ']' | '{' | '}' | '(' | ')' | '|' | '^' | '$' | '\\'
)
})
}
pub fn literal_grams(pattern: &str) -> Option<Vec<u64>> {
build_covering(pattern.as_bytes())
}