scrape_core/query/
explain.rs1use super::{CompiledSelector, QueryResult, specificity::Specificity};
4use crate::dom::Document;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum OptimizationHint {
9 Optimal,
11 UseIdSelector {
13 current: String,
15 suggested: String,
17 },
18 TooBroad {
20 reason: String,
22 },
23 PreferChildCombinator {
25 at: String,
27 },
28 AvoidUniversalSelector,
30 CacheSelector,
32}
33
34#[derive(Debug, Clone)]
36pub struct SelectorExplanation {
37 pub source: String,
39 pub specificity: Specificity,
41 pub description: String,
43 pub performance_notes: Vec<String>,
45 pub hints: Vec<OptimizationHint>,
47 pub estimated_matches: Option<usize>,
49}
50
51impl SelectorExplanation {
52 #[must_use]
59 pub fn analyze(selector: &CompiledSelector) -> Self {
60 let specificity = calculate_specificity(selector);
61 let description = generate_description(selector);
62 let (performance_notes, hints) = analyze_performance(selector);
63
64 Self {
65 source: selector.source().to_string(),
66 specificity,
67 description,
68 performance_notes,
69 hints,
70 estimated_matches: None,
71 }
72 }
73
74 #[must_use]
76 pub fn analyze_with_document(selector: &CompiledSelector, doc: &Document) -> Self {
77 let mut explanation = Self::analyze(selector);
78 explanation.estimated_matches = Some(count_matches(selector, doc));
79 explanation
80 }
81
82 #[must_use]
84 pub fn format(&self) -> String {
85 use std::fmt::Write;
86
87 let mut output = String::new();
88 let source = &self.source;
89 let specificity = &self.specificity;
90 let description = &self.description;
91
92 let _ = writeln!(output, "Selector: {source}");
93 let _ = writeln!(output, "Specificity: {specificity}");
94 let _ = writeln!(output, "Description: {description}");
95
96 if let Some(count) = self.estimated_matches {
97 let _ = writeln!(output, "Estimated matches: {count}");
98 }
99
100 if !self.performance_notes.is_empty() {
101 output.push_str("\nPerformance:\n");
102 for note in &self.performance_notes {
103 let _ = writeln!(output, " - {note}");
104 }
105 }
106
107 if !self.hints.is_empty() {
108 output.push_str("\nOptimization hints:\n");
109 for hint in &self.hints {
110 let hint_str = format_hint(hint);
111 let _ = writeln!(output, " - {hint_str}");
112 }
113 }
114
115 output
116 }
117}
118
119fn calculate_specificity(selector: &CompiledSelector) -> Specificity {
122 use selectors::parser::Component;
123
124 let mut ids = 0u32;
125 let mut classes = 0u32;
126 let mut elements = 0u32;
127
128 for sel in selector.selector_list().slice() {
129 for component in sel.iter_raw_parse_order_from(0) {
130 match component {
131 Component::ID(_) => ids += 1,
132 Component::Class(_)
133 | Component::AttributeInNoNamespace { .. }
134 | Component::AttributeInNoNamespaceExists { .. }
135 | Component::AttributeOther(_)
136 | Component::NonTSPseudoClass(_)
137 | Component::Negation(_)
138 | Component::Is(_)
139 | Component::Where(_)
140 | Component::Has(_) => classes += 1,
141 Component::LocalName(_) | Component::PseudoElement(_) => elements += 1,
142 _ => {}
143 }
144 }
145 }
146
147 Specificity::new(ids, classes, elements)
148}
149
150fn generate_description(selector: &CompiledSelector) -> String {
151 let source = selector.source();
152
153 if source.starts_with('#') && !source.contains(' ') && !source.contains('>') {
155 let id = source.get(1..).unwrap_or("");
156 format!("Element with ID '{id}'")
157 } else if source.starts_with('.') && !source.contains(' ') && !source.contains('>') {
158 let class = source.get(1..).unwrap_or("");
159 format!("Elements with class '{class}'")
160 } else if source.contains(' ') && !source.contains('>') {
161 "Elements matching a descendant selector".to_string()
162 } else if source.contains('>') {
163 "Elements matching a child selector".to_string()
164 } else if source.contains('+') {
165 "Elements matching an adjacent sibling selector".to_string()
166 } else if source.contains('~') {
167 "Elements matching a general sibling selector".to_string()
168 } else {
169 format!("Elements matching '{source}'")
170 }
171}
172
173fn analyze_performance(selector: &CompiledSelector) -> (Vec<String>, Vec<OptimizationHint>) {
174 let mut notes = Vec::new();
175 let mut hints = Vec::new();
176 let source = selector.source();
177
178 if source.contains('*') && !source.contains("[*") {
180 notes.push("Contains universal selector - may be slow on large documents".to_string());
181 hints.push(OptimizationHint::AvoidUniversalSelector);
182 }
183
184 let descendant_count = source.split_whitespace().count();
186 if descendant_count > 3 {
187 notes.push(format!(
188 "Deep descendant chain ({descendant_count} levels) - consider simplifying"
189 ));
190 hints.push(OptimizationHint::TooBroad {
191 reason: "Deep nesting requires traversing many ancestors".to_string(),
192 });
193 }
194
195 if source.starts_with('#') && !source.contains(' ') && !source.contains('>') {
197 notes.push("ID selector - uses fast indexed lookup".to_string());
198 hints.push(OptimizationHint::Optimal);
199 }
200
201 if source.len() > 30 || descendant_count > 2 {
203 hints.push(OptimizationHint::CacheSelector);
204 }
205
206 if source.contains(' ') && !source.contains('>') {
208 notes.push(
209 "Uses descendant combinator - child combinator (>) may be faster for direct children"
210 .to_string(),
211 );
212 }
213
214 (notes, hints)
215}
216
217fn count_matches(selector: &CompiledSelector, doc: &Document) -> usize {
218 use super::find_all_compiled;
219 find_all_compiled(doc, selector).len()
220}
221
222fn format_hint(hint: &OptimizationHint) -> String {
223 match hint {
224 OptimizationHint::Optimal => "Selector is already optimal".to_string(),
225 OptimizationHint::UseIdSelector { current, suggested } => {
226 format!("Consider ID selector: '{current}' -> '{suggested}'")
227 }
228 OptimizationHint::TooBroad { reason } => format!("Too broad: {reason}"),
229 OptimizationHint::PreferChildCombinator { at } => {
230 format!("Consider child combinator (>) at: {at}")
231 }
232 OptimizationHint::AvoidUniversalSelector => {
233 "Avoid universal selector (*) for better performance".to_string()
234 }
235 OptimizationHint::CacheSelector => {
236 "Consider caching this compiled selector for reuse".to_string()
237 }
238 }
239}
240
241pub fn explain(selector: &str) -> QueryResult<SelectorExplanation> {
247 let compiled = CompiledSelector::compile(selector)?;
248 Ok(SelectorExplanation::analyze(&compiled))
249}
250
251pub fn explain_with_document(selector: &str, doc: &Document) -> QueryResult<SelectorExplanation> {
257 let compiled = CompiledSelector::compile(selector)?;
258 Ok(SelectorExplanation::analyze_with_document(&compiled, doc))
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn test_specificity_calculation() {
267 let selector = CompiledSelector::compile("#id .class tag").unwrap();
268 let explanation = SelectorExplanation::analyze(&selector);
269 assert_eq!(explanation.specificity.ids, 1);
270 assert_eq!(explanation.specificity.classes, 1);
271 assert_eq!(explanation.specificity.elements, 1);
272 }
273
274 #[test]
275 fn test_explain_performance_hint_universal() {
276 let explanation = explain("*").unwrap();
277 assert!(explanation.hints.contains(&OptimizationHint::AvoidUniversalSelector));
278 }
279
280 #[test]
281 fn test_explain_id_selector_optimal() {
282 let explanation = explain("#myid").unwrap();
283 assert!(explanation.hints.contains(&OptimizationHint::Optimal));
284 }
285
286 #[test]
287 fn test_explain_deep_nesting() {
288 let explanation = explain("div span p a").unwrap();
289 assert!(explanation.hints.iter().any(|h| matches!(h, OptimizationHint::TooBroad { .. })));
290 }
291
292 #[test]
293 fn test_explain_cache_suggestion() {
294 let explanation = explain("div.container > ul.list > li.item:first-child").unwrap();
295 assert!(explanation.hints.contains(&OptimizationHint::CacheSelector));
296 }
297
298 #[test]
299 fn test_description_generation() {
300 let id_sel = explain("#test").unwrap();
301 assert!(id_sel.description.contains("ID"));
302
303 let class_sel = explain(".test").unwrap();
304 assert!(class_sel.description.contains("class"));
305
306 let descendant_sel = explain("div span").unwrap();
307 assert!(descendant_sel.description.contains("descendant"));
308
309 let child_sel = explain("div > span").unwrap();
310 assert!(child_sel.description.contains("child"));
311 }
312
313 #[test]
314 fn test_format_output() {
315 let explanation = explain("div.test").unwrap();
316 let formatted = explanation.format();
317
318 assert!(formatted.contains("Selector:"));
319 assert!(formatted.contains("Specificity:"));
320 assert!(formatted.contains("Description:"));
321 }
322
323 #[test]
324 fn test_explain_invalid_selector() {
325 let result = explain(":::");
326 assert!(result.is_err());
327 }
328
329 #[test]
330 fn test_explain_under_1ms() {
331 use std::time::Instant;
332
333 let start = Instant::now();
334 for _ in 0..1000 {
335 let _ = explain("div.container > ul.list > li.item:first-child");
336 }
337 let elapsed = start.elapsed();
338
339 assert!(
340 elapsed.as_micros() / 1000 < 1000,
341 "explain() should be <1ms per call, got {}μs average",
342 elapsed.as_micros() / 1000
343 );
344 }
345}