use std::collections::HashSet;
use std::sync::LazyLock;
use std::time::Instant;
use regex::Regex;
use crate::types::{Config, Token};
use crate::validate::validate_regex;
const MAX_DEPTH: usize = 32;
const MAX_TOKENS: usize = 1024;
const MAX_MACRO_EXPANSIONS: usize = 10;
const MAX_REGEX_QUERIES: usize = 5;
const MAX_SEARCH_RESULTS: usize = 100;
const REGEX_TIMEOUT_MS: u128 = 20;
const MAX_REFINERS: usize = 10;
static MACRO_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"@(\w*)").unwrap());
pub struct ExpressionParser<'a> {
config: &'a Config,
depth: usize,
regex_count: usize,
}
impl<'a> ExpressionParser<'a> {
#[must_use]
pub fn new(config: &'a Config) -> Self {
Self {
config,
depth: 0,
regex_count: 0,
}
}
#[must_use]
pub fn query(&mut self, expression: &str, anchor_id: &str) -> Vec<String> {
let expr = expression.trim();
if expr.is_empty() || self.config.all_links.is_empty() {
return Vec::new();
}
let expanded = self.expand_macros(expr, anchor_id);
if expanded.is_empty() {
return Vec::new();
}
let tokens = tokenize(&expanded);
if tokens.is_empty() || tokens.len() > MAX_TOKENS {
return Vec::new();
}
self.depth = 0;
self.regex_count = 0;
let mut pos = 0;
let ids = self.parse_query(&tokens, &mut pos);
dedupe(&ids)
}
#[must_use]
pub fn search_by_class(&self, class_name: &str) -> Vec<String> {
if self.config.all_links.is_empty() {
return Vec::new();
}
self.config
.all_links
.iter()
.filter(|(_, link)| link.tags.iter().any(|t| t == class_name))
.map(|(id, _)| id.clone())
.collect()
}
#[must_use]
pub fn search_by_regex(&mut self, pattern_key: &str, field_opts: &str) -> Vec<String> {
self.regex_count += 1;
if self.regex_count > MAX_REGEX_QUERIES {
return Vec::new();
}
let entry = match self.config.search_patterns.get(pattern_key) {
Some(e) => e,
None => return Vec::new(),
};
let (pattern_str, opts) = match entry {
serde_json::Value::String(s) => (s.as_str(), None),
serde_json::Value::Object(map) => {
let pat = map
.get("pattern")
.and_then(|v| v.as_str())
.unwrap_or_default();
let options = map
.get("options")
.and_then(|v| v.as_object());
(pat, options)
}
_ => return Vec::new(),
};
let check = validate_regex(pattern_str);
if !check.safe {
return Vec::new();
}
let re = match Regex::new(&format!("(?i){pattern_str}")) {
Ok(r) => r,
Err(_) => return Vec::new(),
};
let fo = if field_opts.is_empty() {
opts.and_then(|o| o.get("fields"))
.and_then(|v| v.as_str())
.unwrap_or("a")
} else {
field_opts
};
let fields = parse_field_codes(fo);
let limit = opts
.and_then(|o| o.get("limit"))
.and_then(|v| v.as_f64())
.map_or(MAX_SEARCH_RESULTS, |l| (l as usize).min(MAX_SEARCH_RESULTS));
let max_age = opts
.and_then(|o| o.get("age"))
.and_then(|v| v.as_str())
.map_or(0.0, parse_age);
let now_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as f64;
let start = Instant::now();
struct Hit {
id: String,
created_at: f64,
}
let mut results: Vec<Hit> = Vec::new();
for (id, link) in &self.config.all_links {
if start.elapsed().as_millis() > REGEX_TIMEOUT_MS {
break;
}
if max_age > 0.0 {
let ts = to_timestamp(link.created_at.as_ref());
if ts == 0.0 || (now_ms - ts) > max_age {
continue;
}
}
if matches_fields(&re, id, link, &fields) {
let ts = to_timestamp(link.created_at.as_ref());
results.push(Hit {
id: id.clone(),
created_at: ts,
});
if results.len() >= MAX_SEARCH_RESULTS {
break;
}
}
}
if let Some(sort) = opts.and_then(|o| o.get("sort")).and_then(|v| v.as_str()) {
match sort {
"alpha" => results.sort_by(|a, b| a.id.cmp(&b.id)),
"newest" => results.sort_by(|a, b| b.created_at.total_cmp(&a.created_at)),
"oldest" => results.sort_by(|a, b| a.created_at.total_cmp(&b.created_at)),
_ => {}
}
}
results.truncate(limit);
results.into_iter().map(|h| h.id).collect()
}
fn resolve_protocol(&self, value: &str) -> Vec<String> {
let segments: Vec<&str> = value.split('|').collect();
let protocol_name = segments[0];
let args: Vec<String> = segments[1..].iter().map(|s| s.to_string()).collect();
let protocols = match &self.config.protocols {
Some(p) => p,
None => {
eprintln!("Protocol \"{protocol_name}\" not found in config.protocols");
return Vec::new();
}
};
let protocol = match protocols.get(protocol_name) {
Some(p) => p,
None => {
eprintln!("Protocol \"{protocol_name}\" not found in config.protocols");
return Vec::new();
}
};
let mut result = Vec::new();
for (id, link) in &self.config.all_links {
let handler = protocol.handler;
let matched = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
handler(&args, link, id)
}));
match matched {
Ok(true) => result.push(id.clone()),
Ok(false) => {}
Err(_) => {
eprintln!("Protocol \"{protocol_name}\" handler panicked for item \"{id}\" — skipping");
}
}
}
result
}
fn apply_refiners(&self, ids: &[String], refiners: &[Token]) -> Vec<String> {
if refiners.is_empty() {
return ids.to_vec();
}
let mut links: Vec<(&str, &crate::types::Link)> = ids
.iter()
.filter_map(|id| {
self.config.all_links.get(id).map(|link| (id.as_str(), link))
})
.collect();
for token in refiners {
let value = match token {
Token::Refiner(v) => v,
_ => continue,
};
let parts: Vec<&str> = value.split(':').collect();
let name = parts[0];
let args: Vec<&str> = if parts.len() > 1 { parts[1..].to_vec() } else { Vec::new() };
match name {
"sort" => {
let field = args.first().copied().unwrap_or("label");
links.sort_by(|a, b| {
let a_val = get_link_field(a.0, a.1, field);
let b_val = get_link_field(b.0, b.1, field);
a_val.cmp(&b_val)
});
}
"reverse" => {
links.reverse();
}
"limit" => {
if let Some(n_str) = args.first() {
if let Ok(n) = n_str.parse::<usize>() {
links.truncate(n);
}
}
}
"skip" => {
if let Some(n_str) = args.first() {
if let Ok(n) = n_str.parse::<usize>() {
if n < links.len() {
links = links[n..].to_vec();
} else {
links.clear();
}
}
}
}
"shuffle" => {
let mut seed: u64 = links.len() as u64;
if let Some((id, _)) = links.first() {
for b in id.bytes() {
seed = seed.wrapping_mul(31).wrapping_add(b as u64);
}
}
for i in (1..links.len()).rev() {
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let j = (seed >> 33) as usize % (i + 1);
links.swap(i, j);
}
}
"unique" => {
let field = args.first().copied().unwrap_or("url");
let mut seen = HashSet::new();
links.retain(|(id, link)| {
let val = get_link_field(id, link, field);
seen.insert(val)
});
}
_ => {
eprintln!("Unknown refiner \"{name}\" — skipping");
}
}
}
links.iter().map(|(id, _)| id.to_string()).collect()
}
fn expand_macros(&self, expr: &str, anchor_id: &str) -> String {
let mut result = expr.to_owned();
for _ in 0..MAX_MACRO_EXPANSIONS {
if !result.contains('@') {
break;
}
let before = result.clone();
result = MACRO_RE
.replace_all(&before, |caps: ®ex::Captures| {
let name = &caps[1];
let name = if name.is_empty() { anchor_id } else { name };
if name.is_empty() {
return String::new();
}
self.config
.macros
.get(name)
.filter(|m| !m.link_items.is_empty())
.map_or(String::new(), |m| m.link_items.clone())
})
.into_owned();
if result == before {
break;
}
}
result
}
fn parse_query(&mut self, tokens: &[Token], pos: &mut usize) -> Vec<String> {
let mut result = self.parse_segment(tokens, pos);
while *pos < tokens.len() && tokens[*pos] == Token::Comma {
*pos += 1; if *pos >= tokens.len() {
break;
}
let next = self.parse_segment(tokens, pos);
result.extend(next);
}
result
}
fn parse_segment(&mut self, tokens: &[Token], pos: &mut usize) -> Vec<String> {
if *pos >= tokens.len() {
return Vec::new();
}
let start_pos = *pos;
let mut result = self.parse_term(tokens, pos);
let mut has_initial_term = *pos > start_pos;
while *pos < tokens.len() {
let op = match &tokens[*pos] {
Token::Plus | Token::Pipe | Token::Minus => tokens[*pos].clone(),
_ => break,
};
*pos += 1; if *pos >= tokens.len() {
break;
}
let right = self.parse_term(tokens, pos);
if !has_initial_term {
result = right;
has_initial_term = true;
} else {
match op {
Token::Plus => {
let right_set: HashSet<&str> =
right.iter().map(String::as_str).collect();
result.retain(|id| right_set.contains(id.as_str()));
}
Token::Pipe => {
let seen: HashSet<String> =
result.iter().cloned().collect();
for id in right {
if !seen.contains(&id) {
result.push(id);
}
}
}
Token::Minus => {
let right_set: HashSet<&str> =
right.iter().map(String::as_str).collect();
result.retain(|id| !right_set.contains(id.as_str()));
}
_ => unreachable!(),
}
}
}
let mut refiners: Vec<Token> = Vec::new();
while *pos < tokens.len() {
if let Token::Refiner(_) = &tokens[*pos] {
if refiners.len() >= MAX_REFINERS {
eprintln!("Refiner limit exceeded (max {MAX_REFINERS} per segment). Skipping remaining refiners.");
*pos += 1;
continue;
}
refiners.push(tokens[*pos].clone());
*pos += 1;
} else {
break;
}
}
if !refiners.is_empty() {
result = self.apply_refiners(&result, &refiners);
}
result
}
fn parse_term(&mut self, tokens: &[Token], pos: &mut usize) -> Vec<String> {
if *pos >= tokens.len() {
return Vec::new();
}
if tokens[*pos] == Token::LParen {
self.depth += 1;
if self.depth > MAX_DEPTH {
*pos = tokens.len();
return Vec::new();
}
*pos += 1; let inner = self.parse_segment(tokens, pos);
if *pos < tokens.len() && tokens[*pos] == Token::RParen {
*pos += 1; }
self.depth -= 1;
return inner;
}
self.parse_atom(tokens, pos)
}
fn parse_atom(&mut self, tokens: &[Token], pos: &mut usize) -> Vec<String> {
if *pos >= tokens.len() {
return Vec::new();
}
match &tokens[*pos] {
Token::ItemId(value) => {
let value = value.clone();
*pos += 1;
if self.config.all_links.contains_key(&value) {
vec![value]
} else {
Vec::new()
}
}
Token::Class(value) => {
let value = value.clone();
*pos += 1;
self.search_by_class(&value)
}
Token::Regex(value) => {
let value = value.clone();
*pos += 1;
let (pattern_key, field_opts) = value
.split_once('|')
.map_or((value.as_str(), ""), |(k, o)| (k, o));
self.search_by_regex(pattern_key, field_opts)
}
Token::Protocol(value) => {
let value = value.clone();
*pos += 1;
self.resolve_protocol(&value)
}
Token::DomRef(_) => {
*pos += 1;
Vec::new() }
_ => Vec::new(), }
}
}
fn tokenize(expr: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let chars: Vec<char> = expr.chars().collect();
let n = chars.len();
let mut i = 0;
while i < n {
let ch = chars[i];
if ch.is_whitespace() {
i += 1;
continue;
}
match ch {
'+' => { tokens.push(Token::Plus); i += 1; continue; }
'|' => { tokens.push(Token::Pipe); i += 1; continue; }
'-' => { tokens.push(Token::Minus); i += 1; continue; }
',' => { tokens.push(Token::Comma); i += 1; continue; }
'(' => { tokens.push(Token::LParen); i += 1; continue; }
')' => { tokens.push(Token::RParen); i += 1; continue; }
_ => {}
}
if ch == '.' {
i += 1;
let word = read_word(&chars, &mut i);
if !word.is_empty() {
tokens.push(Token::Class(word));
}
continue;
}
if ch == '#' {
i += 1;
let word = read_word(&chars, &mut i);
if !word.is_empty() {
tokens.push(Token::DomRef(word));
}
continue;
}
if ch == '/' {
i += 1; let mut key = String::new();
while i < n && chars[i] != '/' {
key.push(chars[i]);
i += 1;
}
let mut opts = String::new();
if i < n && chars[i] == '/' {
i += 1; while i < n && "lutdka".contains(chars[i]) {
opts.push(chars[i]);
i += 1;
}
}
if !key.is_empty() {
let val = if opts.is_empty() {
key
} else {
format!("{key}|{opts}")
};
tokens.push(Token::Regex(val));
}
continue;
}
if ch == ':' {
i += 1; let mut segments = String::new();
while i < n && chars[i] != ':' {
segments.push(chars[i]);
i += 1;
}
while i < n && chars[i] == ':' {
i += 1; if i >= n || " \t\n\r+|,()*/".contains(chars[i]) {
break; }
segments.push('|');
while i < n && chars[i] != ':' {
segments.push(chars[i]);
i += 1;
}
}
if !segments.is_empty() {
tokens.push(Token::Protocol(segments));
}
continue;
}
if ch == '*' {
i += 1; let mut content = String::new();
while i < n && chars[i] != '*' {
content.push(chars[i]);
i += 1;
}
if i < n && chars[i] == '*' {
i += 1; }
if !content.is_empty() {
tokens.push(Token::Refiner(content));
}
continue;
}
if is_word_char(ch) {
let word = read_word(&chars, &mut i);
tokens.push(Token::ItemId(word));
continue;
}
i += 1;
}
tokens
}
fn is_word_char(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
fn read_word(chars: &[char], i: &mut usize) -> String {
let mut word = String::new();
while *i < chars.len() && is_word_char(chars[*i]) {
word.push(chars[*i]);
*i += 1;
}
word
}
fn parse_field_codes(codes: &str) -> HashSet<&'static str> {
let mut fields = HashSet::new();
for ch in codes.chars() {
match ch {
'l' => { fields.insert("label"); }
'u' => { fields.insert("url"); }
't' => { fields.insert("tags"); }
'd' => { fields.insert("description"); }
'k' => { fields.insert("id"); }
'a' => {
fields.insert("label");
fields.insert("url");
fields.insert("tags");
fields.insert("description");
fields.insert("id");
}
_ => {}
}
}
if fields.is_empty() {
fields.insert("label");
fields.insert("url");
fields.insert("tags");
fields.insert("description");
fields.insert("id");
}
fields
}
fn matches_fields(
re: &Regex,
id: &str,
link: &crate::types::Link,
fields: &HashSet<&str>,
) -> bool {
if fields.contains("id") && re.is_match(id) {
return true;
}
if fields.contains("label") {
if let Some(label) = &link.label {
if !label.is_empty() && re.is_match(label) {
return true;
}
}
}
if fields.contains("url") && !link.url.is_empty() && re.is_match(&link.url) {
return true;
}
if fields.contains("description") {
if let Some(desc) = &link.description {
if !desc.is_empty() && re.is_match(desc) {
return true;
}
}
}
if fields.contains("tags") {
for tag in &link.tags {
if re.is_match(tag) {
return true;
}
}
}
false
}
static AGE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)^(\d+)\s*([dhwm])$").unwrap());
fn parse_age(age: &str) -> f64 {
let caps = match AGE_RE.captures(age) {
Some(c) => c,
None => return 0.0,
};
let n: f64 = match caps[1].parse() {
Ok(v) => v,
Err(_) => return 0.0,
};
match caps[2].to_ascii_lowercase().as_str() {
"h" => n * 3_600_000.0,
"d" => n * 86_400_000.0,
"w" => n * 604_800_000.0,
"m" => n * 2_592_000_000.0,
_ => 0.0,
}
}
fn to_timestamp(value: Option<&serde_json::Value>) -> f64 {
match value {
None | Some(serde_json::Value::Null) => 0.0,
Some(serde_json::Value::Number(n)) => n.as_f64().unwrap_or(0.0),
Some(serde_json::Value::String(s)) => parse_rfc3339_ms(s),
_ => 0.0,
}
}
fn parse_rfc3339_ms(s: &str) -> f64 {
let s = s.trim();
if s.len() < 19 {
return 0.0;
}
let year: i64 = s[0..4].parse().unwrap_or(0);
let month: i64 = s[5..7].parse().unwrap_or(0);
let day: i64 = s[8..10].parse().unwrap_or(0);
let hour: i64 = s[11..13].parse().unwrap_or(0);
let min: i64 = s[14..16].parse().unwrap_or(0);
let sec: i64 = s[17..19].parse().unwrap_or(0);
if year == 0 || month == 0 || day == 0 {
return 0.0;
}
let days = days_from_epoch(year, month, day);
let secs = days * 86400 + hour * 3600 + min * 60 + sec;
let tz_offset_secs = if s.len() > 19 {
let tz = &s[19..];
if tz.starts_with('Z') || tz.starts_with('z') {
0i64
} else if (tz.starts_with('+') || tz.starts_with('-')) && tz.len() >= 6 {
let sign: i64 = if tz.starts_with('-') { -1 } else { 1 };
let th: i64 = tz[1..3].parse().unwrap_or(0);
let tm: i64 = tz[4..6].parse().unwrap_or(0);
sign * (th * 3600 + tm * 60)
} else {
0i64
}
} else {
0i64
};
((secs - tz_offset_secs) * 1000) as f64
}
fn days_from_epoch(year: i64, month: i64, day: i64) -> i64 {
let y = if month <= 2 { year - 1 } else { year };
let era = y.div_euclid(400);
let yoe = y.rem_euclid(400);
let m = if month > 2 { month - 3 } else { month + 9 };
let doy = (153 * m + 2) / 5 + day - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
era * 146097 + doe - 719468
}
fn get_link_field(id: &str, link: &crate::types::Link, field: &str) -> String {
match field {
"id" => id.to_string(),
"label" => link.label.clone().unwrap_or_default(),
"url" => link.url.clone(),
"description" => link.description.clone().unwrap_or_default(),
_ => String::new(),
}
}
fn dedupe(ids: &[String]) -> Vec<String> {
if ids.is_empty() {
return Vec::new();
}
let mut seen = HashSet::with_capacity(ids.len());
let mut result = Vec::with_capacity(ids.len());
for id in ids {
if seen.insert(id.as_str()) {
result.push(id.clone());
}
}
result
}