Skip to main content

scrape_core/query/
explain.rs

1//! Selector explanation and performance analysis.
2
3use super::{CompiledSelector, QueryResult, specificity::Specificity};
4use crate::dom::Document;
5
6/// Performance hint for selector optimization.
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum OptimizationHint {
9    /// Selector is already optimal.
10    Optimal,
11    /// Consider using ID selector for better performance.
12    UseIdSelector {
13        /// Current selector pattern.
14        current: String,
15        /// Suggested optimized selector.
16        suggested: String,
17    },
18    /// Selector is too broad, consider being more specific.
19    TooBroad {
20        /// Reason why the selector is too broad.
21        reason: String,
22    },
23    /// Descendant combinator can be slow, consider child combinator.
24    PreferChildCombinator {
25        /// Location of the descendant combinator.
26        at: String,
27    },
28    /// Universal selector (*) should be avoided.
29    AvoidUniversalSelector,
30    /// Consider caching this compiled selector.
31    CacheSelector,
32}
33
34/// Detailed explanation of a CSS selector.
35#[derive(Debug, Clone)]
36pub struct SelectorExplanation {
37    /// The original selector string.
38    pub source: String,
39    /// Parsed specificity.
40    pub specificity: Specificity,
41    /// Human-readable description of what the selector matches.
42    pub description: String,
43    /// Performance characteristics.
44    pub performance_notes: Vec<String>,
45    /// Optimization suggestions.
46    pub hints: Vec<OptimizationHint>,
47    /// Estimated match count (if document provided).
48    pub estimated_matches: Option<usize>,
49}
50
51impl SelectorExplanation {
52    /// Creates a new selector explanation by analyzing the selector.
53    ///
54    /// # Performance
55    ///
56    /// This function is O(n) where n is the selector length.
57    /// Target: <1ms for typical selectors.
58    #[must_use]
59    pub fn analyze(selector: &CompiledSelector) -> Self {
60        let specificity = calculate_specificity(selector);
61        let description = generate_description(selector);
62        let (performance_notes, hints) = analyze_performance(selector);
63
64        Self {
65            source: selector.source().to_string(),
66            specificity,
67            description,
68            performance_notes,
69            hints,
70            estimated_matches: None,
71        }
72    }
73
74    /// Creates explanation with estimated match count from a document.
75    #[must_use]
76    pub fn analyze_with_document(selector: &CompiledSelector, doc: &Document) -> Self {
77        let mut explanation = Self::analyze(selector);
78        explanation.estimated_matches = Some(count_matches(selector, doc));
79        explanation
80    }
81
82    /// Formats the explanation for human-readable output.
83    #[must_use]
84    pub fn format(&self) -> String {
85        use std::fmt::Write;
86
87        let mut output = String::new();
88        let source = &self.source;
89        let specificity = &self.specificity;
90        let description = &self.description;
91
92        let _ = writeln!(output, "Selector: {source}");
93        let _ = writeln!(output, "Specificity: {specificity}");
94        let _ = writeln!(output, "Description: {description}");
95
96        if let Some(count) = self.estimated_matches {
97            let _ = writeln!(output, "Estimated matches: {count}");
98        }
99
100        if !self.performance_notes.is_empty() {
101            output.push_str("\nPerformance:\n");
102            for note in &self.performance_notes {
103                let _ = writeln!(output, "  - {note}");
104            }
105        }
106
107        if !self.hints.is_empty() {
108            output.push_str("\nOptimization hints:\n");
109            for hint in &self.hints {
110                let hint_str = format_hint(hint);
111                let _ = writeln!(output, "  - {hint_str}");
112            }
113        }
114
115        output
116    }
117}
118
119// Internal functions for analysis
120
121fn calculate_specificity(selector: &CompiledSelector) -> Specificity {
122    use selectors::parser::Component;
123
124    let mut ids = 0u32;
125    let mut classes = 0u32;
126    let mut elements = 0u32;
127
128    for sel in selector.selector_list().slice() {
129        for component in sel.iter_raw_parse_order_from(0) {
130            match component {
131                Component::ID(_) => ids += 1,
132                Component::Class(_)
133                | Component::AttributeInNoNamespace { .. }
134                | Component::AttributeInNoNamespaceExists { .. }
135                | Component::AttributeOther(_)
136                | Component::NonTSPseudoClass(_)
137                | Component::Negation(_)
138                | Component::Is(_)
139                | Component::Where(_)
140                | Component::Has(_) => classes += 1,
141                Component::LocalName(_) | Component::PseudoElement(_) => elements += 1,
142                _ => {}
143            }
144        }
145    }
146
147    Specificity::new(ids, classes, elements)
148}
149
150fn generate_description(selector: &CompiledSelector) -> String {
151    let source = selector.source();
152
153    // Simple heuristics for common patterns
154    if source.starts_with('#') && !source.contains(' ') && !source.contains('>') {
155        let id = source.get(1..).unwrap_or("");
156        format!("Element with ID '{id}'")
157    } else if source.starts_with('.') && !source.contains(' ') && !source.contains('>') {
158        let class = source.get(1..).unwrap_or("");
159        format!("Elements with class '{class}'")
160    } else if source.contains(' ') && !source.contains('>') {
161        "Elements matching a descendant selector".to_string()
162    } else if source.contains('>') {
163        "Elements matching a child selector".to_string()
164    } else if source.contains('+') {
165        "Elements matching an adjacent sibling selector".to_string()
166    } else if source.contains('~') {
167        "Elements matching a general sibling selector".to_string()
168    } else {
169        format!("Elements matching '{source}'")
170    }
171}
172
173fn analyze_performance(selector: &CompiledSelector) -> (Vec<String>, Vec<OptimizationHint>) {
174    let mut notes = Vec::new();
175    let mut hints = Vec::new();
176    let source = selector.source();
177
178    // Check for universal selector
179    if source.contains('*') && !source.contains("[*") {
180        notes.push("Contains universal selector - may be slow on large documents".to_string());
181        hints.push(OptimizationHint::AvoidUniversalSelector);
182    }
183
184    // Check for deep descendant selectors
185    let descendant_count = source.split_whitespace().count();
186    if descendant_count > 3 {
187        notes.push(format!(
188            "Deep descendant chain ({descendant_count} levels) - consider simplifying"
189        ));
190        hints.push(OptimizationHint::TooBroad {
191            reason: "Deep nesting requires traversing many ancestors".to_string(),
192        });
193    }
194
195    // Check for ID selector (fast path)
196    if source.starts_with('#') && !source.contains(' ') && !source.contains('>') {
197        notes.push("ID selector - uses fast indexed lookup".to_string());
198        hints.push(OptimizationHint::Optimal);
199    }
200
201    // Suggest caching for complex selectors
202    if source.len() > 30 || descendant_count > 2 {
203        hints.push(OptimizationHint::CacheSelector);
204    }
205
206    // Check for descendant vs child combinator
207    if source.contains(' ') && !source.contains('>') {
208        notes.push(
209            "Uses descendant combinator - child combinator (>) may be faster for direct children"
210                .to_string(),
211        );
212    }
213
214    (notes, hints)
215}
216
217fn count_matches(selector: &CompiledSelector, doc: &Document) -> usize {
218    use super::find_all_compiled;
219    find_all_compiled(doc, selector).len()
220}
221
222fn format_hint(hint: &OptimizationHint) -> String {
223    match hint {
224        OptimizationHint::Optimal => "Selector is already optimal".to_string(),
225        OptimizationHint::UseIdSelector { current, suggested } => {
226            format!("Consider ID selector: '{current}' -> '{suggested}'")
227        }
228        OptimizationHint::TooBroad { reason } => format!("Too broad: {reason}"),
229        OptimizationHint::PreferChildCombinator { at } => {
230            format!("Consider child combinator (>) at: {at}")
231        }
232        OptimizationHint::AvoidUniversalSelector => {
233            "Avoid universal selector (*) for better performance".to_string()
234        }
235        OptimizationHint::CacheSelector => {
236            "Consider caching this compiled selector for reuse".to_string()
237        }
238    }
239}
240
241/// Convenience function to explain a selector string.
242///
243/// # Errors
244///
245/// Returns `QueryError::InvalidSelector` if the selector is invalid.
246pub fn explain(selector: &str) -> QueryResult<SelectorExplanation> {
247    let compiled = CompiledSelector::compile(selector)?;
248    Ok(SelectorExplanation::analyze(&compiled))
249}
250
251/// Explains a selector with match count from a document.
252///
253/// # Errors
254///
255/// Returns `QueryError::InvalidSelector` if the selector is invalid.
256pub fn explain_with_document(selector: &str, doc: &Document) -> QueryResult<SelectorExplanation> {
257    let compiled = CompiledSelector::compile(selector)?;
258    Ok(SelectorExplanation::analyze_with_document(&compiled, doc))
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn test_specificity_calculation() {
267        let selector = CompiledSelector::compile("#id .class tag").unwrap();
268        let explanation = SelectorExplanation::analyze(&selector);
269        assert_eq!(explanation.specificity.ids, 1);
270        assert_eq!(explanation.specificity.classes, 1);
271        assert_eq!(explanation.specificity.elements, 1);
272    }
273
274    #[test]
275    fn test_explain_performance_hint_universal() {
276        let explanation = explain("*").unwrap();
277        assert!(explanation.hints.contains(&OptimizationHint::AvoidUniversalSelector));
278    }
279
280    #[test]
281    fn test_explain_id_selector_optimal() {
282        let explanation = explain("#myid").unwrap();
283        assert!(explanation.hints.contains(&OptimizationHint::Optimal));
284    }
285
286    #[test]
287    fn test_explain_deep_nesting() {
288        let explanation = explain("div span p a").unwrap();
289        assert!(explanation.hints.iter().any(|h| matches!(h, OptimizationHint::TooBroad { .. })));
290    }
291
292    #[test]
293    fn test_explain_cache_suggestion() {
294        let explanation = explain("div.container > ul.list > li.item:first-child").unwrap();
295        assert!(explanation.hints.contains(&OptimizationHint::CacheSelector));
296    }
297
298    #[test]
299    fn test_description_generation() {
300        let id_sel = explain("#test").unwrap();
301        assert!(id_sel.description.contains("ID"));
302
303        let class_sel = explain(".test").unwrap();
304        assert!(class_sel.description.contains("class"));
305
306        let descendant_sel = explain("div span").unwrap();
307        assert!(descendant_sel.description.contains("descendant"));
308
309        let child_sel = explain("div > span").unwrap();
310        assert!(child_sel.description.contains("child"));
311    }
312
313    #[test]
314    fn test_format_output() {
315        let explanation = explain("div.test").unwrap();
316        let formatted = explanation.format();
317
318        assert!(formatted.contains("Selector:"));
319        assert!(formatted.contains("Specificity:"));
320        assert!(formatted.contains("Description:"));
321    }
322
323    #[test]
324    fn test_explain_invalid_selector() {
325        let result = explain(":::");
326        assert!(result.is_err());
327    }
328
329    #[test]
330    fn test_explain_under_1ms() {
331        use std::time::Instant;
332
333        let start = Instant::now();
334        for _ in 0..1000 {
335            let _ = explain("div.container > ul.list > li.item:first-child");
336        }
337        let elapsed = start.elapsed();
338
339        assert!(
340            elapsed.as_micros() / 1000 < 1000,
341            "explain() should be <1ms per call, got {}μs average",
342            elapsed.as_micros() / 1000
343        );
344    }
345}