Skip to main content

headson/
grep.rs

1use anyhow::Result;
2use regex::{Regex, RegexBuilder};
3
4use crate::order::{
5    NodeId, ObjectType, PriorityOrder, ROOT_PQ_ID, RankedNode,
6};
7
8#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
9pub enum GrepShow {
10    #[default]
11    Matching,
12    All,
13}
14
15/// Pattern configuration for grep, designed to make invalid states unrepresentable.
16/// The highlight regex is always derivable from the pattern state.
17#[derive(Default)]
18pub enum GrepPatterns {
19    /// No grep patterns configured
20    #[default]
21    None,
22    /// Only strong (filtering) pattern
23    StrongOnly(Regex),
24    /// Only weak (highlighting) pattern
25    WeakOnly(Regex),
26    /// Both strong and weak patterns, with precomputed combined highlight regex
27    Both {
28        strong: Regex,
29        weak: Regex,
30        highlight: Regex,
31    },
32}
33
34impl GrepPatterns {
35    /// Returns the strong (filtering) regex if configured.
36    pub fn strong(&self) -> Option<&Regex> {
37        match self {
38            Self::StrongOnly(r) | Self::Both { strong: r, .. } => Some(r),
39            _ => None,
40        }
41    }
42
43    /// Returns the weak (highlight-only) regex if configured.
44    pub fn weak(&self) -> Option<&Regex> {
45        match self {
46            Self::WeakOnly(r) | Self::Both { weak: r, .. } => Some(r),
47            _ => None,
48        }
49    }
50
51    /// Returns the highlight regex (strong | weak combined).
52    /// No cloning needed - returns a reference.
53    pub fn highlight(&self) -> Option<&Regex> {
54        match self {
55            Self::None => None,
56            Self::StrongOnly(r) | Self::WeakOnly(r) => Some(r),
57            Self::Both { highlight, .. } => Some(highlight),
58        }
59    }
60
61    /// Returns true if a strong (filtering) pattern is configured.
62    pub fn has_strong(&self) -> bool {
63        matches!(self, Self::StrongOnly(_) | Self::Both { .. })
64    }
65
66    /// Returns true if any pattern (strong or weak) is configured.
67    pub fn is_active(&self) -> bool {
68        !matches!(self, Self::None)
69    }
70}
71
72/// Grep configuration threaded through the pipeline.
73#[derive(Default)]
74pub struct GrepConfig {
75    pub patterns: GrepPatterns,
76    pub show: GrepShow,
77}
78
79impl GrepConfig {
80    pub fn has_strong(&self) -> bool {
81        self.patterns.has_strong()
82    }
83}
84
85fn build_regex(pat: &str, case_insensitive: bool) -> Result<Regex> {
86    Ok(RegexBuilder::new(pat)
87        .unicode(true)
88        .case_insensitive(case_insensitive)
89        .build()?)
90}
91
92/// Combine multiple patterns into a single regex string.
93/// Case-sensitive patterns are wrapped in `(?:...)`, case-insensitive in `(?i:...)`.
94/// This prevents inline flags from leaking between patterns when joined with `|`.
95/// Returns `None` if no patterns are provided.
96pub fn combine_patterns(
97    case_sensitive: &[impl AsRef<str>],
98    case_insensitive: &[impl AsRef<str>],
99) -> Option<String> {
100    let mut parts: Vec<String> = Vec::new();
101
102    for pat in case_sensitive {
103        parts.push(format!("(?:{})", pat.as_ref()));
104    }
105    for pat in case_insensitive {
106        parts.push(format!("(?i:{})", pat.as_ref()));
107    }
108
109    if parts.is_empty() {
110        None
111    } else {
112        Some(parts.join("|"))
113    }
114}
115
116/// Build a GrepConfig from pattern slices.
117/// Combines case-sensitive and case-insensitive patterns with OR semantics.
118pub fn build_grep_config_from_patterns(
119    strong: &[impl AsRef<str>],
120    strong_icase: &[impl AsRef<str>],
121    weak: &[impl AsRef<str>],
122    weak_icase: &[impl AsRef<str>],
123    grep_show: GrepShow,
124) -> Result<GrepConfig> {
125    let strong_combined = combine_patterns(strong, strong_icase);
126    let weak_combined = combine_patterns(weak, weak_icase);
127    build_grep_config(
128        strong_combined.as_deref(),
129        weak_combined.as_deref(),
130        grep_show,
131        false, // case-insensitivity already embedded via (?i:...)
132    )
133}
134
135/// Build a GrepConfig from optional pattern strings.
136/// For simple cases with single patterns. Use `build_grep_config_from_patterns`
137/// for multiple patterns with mixed case-sensitivity.
138pub fn build_grep_config(
139    grep: Option<&str>,
140    weak_grep: Option<&str>,
141    grep_show: GrepShow,
142    case_insensitive: bool,
143) -> Result<GrepConfig> {
144    let patterns = match (grep, weak_grep) {
145        (Some(s), Some(w)) => {
146            let strong = build_regex(s, case_insensitive)?;
147            let weak = build_regex(w, case_insensitive)?;
148            // Combine patterns without redundant parens - | has lowest precedence
149            let combined = format!("{}|{}", strong.as_str(), weak.as_str());
150            let highlight = build_regex(&combined, case_insensitive)?;
151            GrepPatterns::Both {
152                strong,
153                weak,
154                highlight,
155            }
156        }
157        (Some(s), None) => {
158            GrepPatterns::StrongOnly(build_regex(s, case_insensitive)?)
159        }
160        (None, Some(w)) => {
161            GrepPatterns::WeakOnly(build_regex(w, case_insensitive)?)
162        }
163        (None, None) => GrepPatterns::None,
164    };
165
166    Ok(GrepConfig {
167        patterns,
168        show: grep_show,
169    })
170}
171
172/// Grep matching state computed from a priority order.
173/// - `matched_nodes`: nodes matching any grep pattern (strong OR weak), used for priority boosting
174/// - `guaranteed_nodes`: nodes matching strong patterns only, must be included in output
175/// - `guaranteed_count`: count of nodes in `guaranteed_nodes` (used for filtering decisions)
176pub(crate) struct GrepState {
177    pub matched_nodes: Vec<bool>,
178    pub guaranteed_nodes: Vec<bool>,
179    pub guaranteed_count: usize,
180}
181
182fn matches_ranked(
183    order: &PriorityOrder,
184    idx: usize,
185    node: &RankedNode,
186    re: &Regex,
187) -> bool {
188    let value_match = match node {
189        RankedNode::SplittableLeaf { value, .. } => re.is_match(value),
190        RankedNode::AtomicLeaf { token, .. } => re.is_match(token),
191        _ => false,
192    };
193    if value_match {
194        return true;
195    }
196    let key_match = node.key_in_object().is_some_and(|k| re.is_match(k));
197    if !key_match {
198        return false;
199    }
200    let is_fileset_child = order
201        .object_type
202        .get(ROOT_PQ_ID)
203        .is_some_and(|t| *t == ObjectType::Fileset)
204        && order
205            .parent
206            .get(idx)
207            .and_then(|p| *p)
208            .is_some_and(|p| p.0 == ROOT_PQ_ID);
209    !is_fileset_child
210}
211
212fn mark_matches_and_ancestors(
213    order: &PriorityOrder,
214    re: &Regex,
215    flags: &mut [bool],
216) {
217    for (idx, node) in order.nodes.iter().enumerate() {
218        if !matches_ranked(order, idx, node, re) {
219            continue;
220        }
221        let mut cursor = Some(NodeId(idx));
222        while let Some(node_id) = cursor {
223            let raw = node_id.0;
224            if flags[raw] {
225                break;
226            }
227            flags[raw] = true;
228            cursor = order.parent.get(raw).and_then(|p| *p);
229        }
230    }
231}
232
233/// Compute grep state by scanning the tree.
234/// Returns `None` if no grep patterns are configured.
235/// Returns `Some(GrepState)` if grep is active, even with zero matches.
236/// This makes the semantics clear: `None` = grep disabled, `Some` = grep enabled.
237pub(crate) fn compute_grep_state(
238    order: &PriorityOrder,
239    grep: &GrepConfig,
240) -> Option<GrepState> {
241    if !grep.patterns.is_active() {
242        return None;
243    }
244
245    let mut guaranteed_nodes = vec![false; order.total_nodes];
246
247    // Compute guaranteed (strong) matches once
248    if let Some(re) = grep.patterns.strong() {
249        mark_matches_and_ancestors(order, re, &mut guaranteed_nodes);
250    }
251
252    // matched_nodes = guaranteed_nodes OR weak matches
253    let mut matched_nodes = guaranteed_nodes.clone();
254    if let Some(re) = grep.patterns.weak() {
255        mark_matches_and_ancestors(order, re, &mut matched_nodes);
256    }
257
258    let guaranteed_count = guaranteed_nodes.iter().filter(|b| **b).count();
259
260    Some(GrepState {
261        matched_nodes,
262        guaranteed_nodes,
263        guaranteed_count,
264    })
265}
266
267/// Reorder priority so grep-matched nodes are visited first, preserving the
268/// existing relative order within each bucket.
269pub(crate) fn reorder_priority_for_grep(
270    order: &mut PriorityOrder,
271    matched_nodes: &[bool],
272) {
273    let mut seen = vec![false; order.total_nodes];
274    let mut reordered: Vec<NodeId> = Vec::with_capacity(order.total_nodes);
275    for &id in order.by_priority.iter() {
276        let idx = id.0;
277        if matched_nodes.get(idx).copied().unwrap_or(false) && !seen[idx] {
278            reordered.push(id);
279            seen[idx] = true;
280        }
281    }
282
283    for &id in order.by_priority.iter() {
284        let idx = id.0;
285        if !seen[idx] {
286            reordered.push(id);
287            seen[idx] = true;
288        }
289    }
290    order.by_priority = reordered;
291}