use crate::matcher::rule_loader::{RuleSet, ZoneScope};
use crate::matcher::span::{MatchSpan, Property};
use crate::tokenizer;
use crate::zone_map::{self, ZoneMap};
pub(crate) struct MatchContext<'a> {
pub input: &'a str,
pub tokens: &'a [tokenizer::Token],
pub rule_set: &'a RuleSet,
pub property: Property,
pub priority: i32,
pub zone_map: &'a ZoneMap,
pub dir_zone: Option<&'a zone_map::SegmentZone>,
}
pub(crate) fn match_tokens_in_segment(ctx: &MatchContext, matches: &mut Vec<MatchSpan>) {
let mut matched_ranges: Vec<(usize, usize)> = Vec::new();
for window_size in (1..=3).rev() {
for i in 0..ctx.tokens.len() {
if i + window_size > ctx.tokens.len() {
break;
}
let win_start = ctx.tokens[i].start;
let win_end = ctx.tokens[i + window_size - 1].end;
let (effective_has_anchors, effective_title_zone) = if let Some(dz) = ctx.dir_zone {
(dz.has_anchors, &dz.title_zone)
} else {
(ctx.zone_map.has_anchors, &ctx.zone_map.title_zone)
};
if effective_has_anchors {
let in_title_zone = effective_title_zone.contains(&win_start);
match ctx.rule_set.zone_scope {
ZoneScope::TechOnly if in_title_zone => continue,
ZoneScope::AfterAnchor if in_title_zone => continue,
_ => {}
}
}
if matched_ranges
.iter()
.any(|(s, e)| win_start < *e && win_end > *s)
{
continue;
}
let compound = if window_size == 1 {
ctx.tokens[i].text.clone()
} else {
ctx.input[win_start..win_end].to_string()
};
if let Some(token_match) = ctx.rule_set.match_token(&compound) {
let last_idx = i + window_size - 1;
if let Some(ref blocked) = token_match.not_before
&& last_idx + 1 < ctx.tokens.len()
&& blocked
.iter()
.any(|b| b.as_str() == ctx.tokens[last_idx + 1].lower())
{
continue;
}
if let Some(ref blocked) = token_match.not_after
&& i > 0
&& blocked
.iter()
.any(|b| b.as_str() == ctx.tokens[i - 1].lower())
{
continue;
}
if let Some(ref required) = token_match.requires_after {
let ok = last_idx + 1 < ctx.tokens.len()
&& required
.iter()
.any(|r| r.as_str() == ctx.tokens[last_idx + 1].lower());
if !ok {
continue;
}
}
if token_match.requires_context && !ctx.zone_map.has_anchors {
if let Some(ref required) = token_match.requires_before {
let ok = i > 0
&& required
.iter()
.any(|r| r.as_str() == ctx.tokens[i - 1].lower());
if !ok {
continue;
}
} else {
continue;
}
} else if !token_match.requires_context {
if let Some(ref required) = token_match.requires_before {
let ok = i > 0
&& required
.iter()
.any(|r| r.as_str() == ctx.tokens[i - 1].lower());
if !ok {
continue;
}
}
}
let mut reclaimable = token_match.reclaimable;
if let Some(ref nearby) = token_match.requires_nearby {
let nearby_found = ctx
.tokens
.iter()
.any(|t| nearby.iter().any(|n| n.as_str() == t.lower()));
if !nearby_found {
reclaimable = true;
}
}
let span = MatchSpan::new(win_start, win_end, ctx.property, token_match.value)
.with_priority(ctx.priority);
let span = if reclaimable {
span.with_reclaimable()
} else {
span
};
matches.push(span);
matched_ranges.push((win_start, win_end));
for se in &token_match.side_effects {
if let Some(se_prop) = Property::from_name(&se.property) {
matches.push(
MatchSpan::new(win_start, win_end, se_prop, &se.value)
.with_priority(ctx.priority),
);
}
}
}
}
}
}
pub(super) fn effective_priority_for_segment(rule_priority: i32, is_dir: bool) -> i32 {
if is_dir {
rule_priority + crate::priority::DIR_PENALTY
} else {
rule_priority
}
}
pub(super) fn find_dir_zone_for_segment(
dir_zones: &[zone_map::SegmentZone],
seg_idx: usize,
) -> Option<&zone_map::SegmentZone> {
dir_zones.iter().find(|dz| dz.segment_idx == seg_idx)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::priority;
use crate::zone_map::SegmentZone;
#[test]
fn effective_priority_filename_unchanged() {
assert_eq!(effective_priority_for_segment(10, false), 10);
assert_eq!(effective_priority_for_segment(0, false), 0);
assert_eq!(effective_priority_for_segment(-3, false), -3);
}
#[test]
fn effective_priority_directory_adds_dir_penalty() {
assert_eq!(
effective_priority_for_segment(10, true),
10 + priority::DIR_PENALTY
);
assert_eq!(effective_priority_for_segment(10, true), 5);
}
#[test]
fn effective_priority_dir_with_negative_rule_priority() {
assert_eq!(
effective_priority_for_segment(priority::HEURISTIC, true),
priority::HEURISTIC + priority::DIR_PENALTY
);
assert_eq!(
effective_priority_for_segment(priority::HEURISTIC, true),
-6
);
}
fn make_zone(segment_idx: usize) -> SegmentZone {
SegmentZone {
segment_idx,
title_zone: 0..0,
tech_zone: 0..0,
has_anchors: false,
}
}
#[test]
fn find_dir_zone_empty_list_returns_none() {
assert!(find_dir_zone_for_segment(&[], 0).is_none());
assert!(find_dir_zone_for_segment(&[], 42).is_none());
}
#[test]
fn find_dir_zone_no_match_returns_none() {
let zones = vec![make_zone(1), make_zone(3)];
assert!(find_dir_zone_for_segment(&zones, 2).is_none());
}
#[test]
fn find_dir_zone_single_match_returns_it() {
let zones = vec![make_zone(2)];
let found = find_dir_zone_for_segment(&zones, 2).expect("should find");
assert_eq!(found.segment_idx, 2);
}
#[test]
fn find_dir_zone_picks_correct_zone_among_many() {
let zones = vec![make_zone(0), make_zone(1), make_zone(2)];
let found = find_dir_zone_for_segment(&zones, 1).expect("should find");
assert_eq!(
found.segment_idx, 1,
"must return the zone whose idx matches the query"
);
}
#[test]
fn find_dir_zone_first_match_wins_on_duplicates() {
let zones = vec![make_zone(5), make_zone(5)];
let found = find_dir_zone_for_segment(&zones, 5).expect("should find");
assert_eq!(found.segment_idx, 5);
}
}