htmls/interpreter/
set.rs

1use std::collections::HashSet;
2
3use crate::Node;
4use crate::SetOperationNode;
5
6use super::Visitor;
7use super::error::InterpreterError;
8use super::{Interpreter, InterpreterResult, SelectionResult};
9use super::result::NodeHandle;
10
11/// Apply set operation
12pub fn apply_set_operation(it: &mut Interpreter, node: &SetOperationNode) -> InterpreterResult<()> {
13    match node {
14        SetOperationNode::Union(left, right) => union_operation(it, left, right)?,
15        SetOperationNode::Intersection(left, right) => intersection_operation(it, left, right)?,
16        SetOperationNode::Difference(left, right) => difference_operation(it, left, right)?,
17    }
18
19    Ok(())
20}
21
22/// Set operation result type
23enum OperationResults {
24    Nodes(Vec<NodeHandle>, Vec<NodeHandle>),
25    Texts(Vec<String>, Vec<String>),
26}
27
28/// Execute both sides of the node and return results
29fn execute_sides(
30    it: &Interpreter,
31    left: &Box<Node>,
32    right: &Box<Node>,
33    op_name: &str,
34) -> InterpreterResult<OperationResults> {
35    // Execute left and right nodes, using clone but optimized to only copy necessary parts
36    let mut it_left = it.clone();
37    let mut it_right = it.clone();
38
39    it_left.visit_node(left)?;
40    it_right.visit_node(right)?;
41
42    // Get results from both sides
43    let left_results = it_left.result;
44    let right_results = it_right.result;
45
46    // Ensure both sides have consistent result types
47    if left_results.is_nodes() != right_results.is_nodes() {
48        return Err(InterpreterError::execution_error(
49            &format!("{} operation has inconsistent result types: left side is {}, right side is {}",
50                op_name,
51                if left_results.is_nodes() { "nodes" } else { "texts" },
52                if right_results.is_nodes() { "nodes" } else { "texts" }
53            )
54        ));
55    }
56
57    // Return different result sets based on the result type
58    if left_results.is_nodes() {
59        let left_nodes = left_results.nodes()?.clone();
60        let right_nodes = right_results.nodes()?.clone();
61        Ok(OperationResults::Nodes(left_nodes, right_nodes))
62    } else {
63        let left_texts = left_results.texts()?.clone();
64        let right_texts = right_results.texts()?.clone();
65        Ok(OperationResults::Texts(left_texts, right_texts))
66    }
67}
68
69/// Union operation
70fn union_operation(
71    it: &mut Interpreter,
72    left: &Box<Node>,
73    right: &Box<Node>,
74) -> InterpreterResult<()> {
75    // Execute both sides of the node and get results
76    match execute_sides(it, left, right, "union")? {
77        OperationResults::Nodes(left_nodes, right_nodes) => {
78            // Estimate result size to optimize memory allocation
79            let estimated_size = left_nodes.len() + right_nodes.len();
80            
81            // Use hash set for deduplication
82            let mut seen_ids = HashSet::with_capacity(estimated_size);
83            let mut result = Vec::with_capacity(estimated_size);
84
85            // Add left side results
86            for node in left_nodes {
87                let id = node.id().to_string();
88                if seen_ids.insert(id) {
89                    result.push(node);
90                }
91            }
92
93            // Add unseen right side results
94            for node in right_nodes {
95                let id = node.id().to_string();
96                if seen_ids.insert(id) {
97                    result.push(node);
98                }
99            }
100
101            it.result = SelectionResult::with_nodes(result);
102        },
103        OperationResults::Texts(left_texts, right_texts) => {
104            // Estimate result size
105            let estimated_size = left_texts.len() + right_texts.len();
106            
107            // Use hash set for deduplication
108            let mut seen_texts = HashSet::with_capacity(estimated_size);
109            let mut result = Vec::with_capacity(estimated_size);
110
111            // Add left side texts
112            for text in left_texts {
113                if seen_texts.insert(text.clone()) {
114                    result.push(text);
115                }
116            }
117
118            // Add unseen right side texts
119            for text in right_texts {
120                if seen_texts.insert(text.clone()) {
121                    result.push(text);
122                }
123            }
124
125            it.result = SelectionResult::with_texts(result);
126        }
127    }
128
129    Ok(())
130}
131
132/// Intersection operation
133fn intersection_operation(
134    it: &mut Interpreter,
135    left: &Box<Node>,
136    right: &Box<Node>,
137) -> InterpreterResult<()> {
138    // Execute both sides of the node and get results
139    match execute_sides(it, left, right, "intersection")? {
140        OperationResults::Nodes(left_nodes, right_nodes) => {
141            // Create hash set of left side node IDs, pre-allocate capacity
142            let left_ids: HashSet<String> = left_nodes
143                .iter()
144                .map(|node| node.id().to_string())
145                .collect();
146
147            // Estimate result capacity (worst case is all from right side)
148            let mut node_result = Vec::with_capacity(right_nodes.len());
149            
150            // Filter right side nodes, only keep nodes with IDs appearing on the left side
151            for node in right_nodes {
152                let node_id = node.id().to_string();
153                if left_ids.contains(&node_id) {
154                    node_result.push(node);
155                }
156            }
157
158            it.result = SelectionResult::with_nodes(node_result);
159        },
160        OperationResults::Texts(left_texts, right_texts) => {
161            // Create hash set of left side texts
162            let left_text_set: HashSet<String> = left_texts.into_iter().collect();
163            
164            // Estimate result capacity
165            let mut text_result = Vec::with_capacity(right_texts.len());
166            
167            // Filter right side texts, only keep texts appearing on the left side
168            for text in right_texts {
169                if left_text_set.contains(&text) {
170                    text_result.push(text);
171                }
172            }
173
174            it.result = SelectionResult::with_texts(text_result);
175        }
176    }
177
178    Ok(())
179}
180
181/// Difference operation
182fn difference_operation(
183    it: &mut Interpreter,
184    left: &Box<Node>,
185    right: &Box<Node>,
186) -> InterpreterResult<()> {
187    // Execute both sides of the node and get results
188    match execute_sides(it, left, right, "difference")? {
189        OperationResults::Nodes(left_nodes, right_nodes) => {
190            // Create hash set of right side node IDs, pre-allocate capacity
191            let right_ids: HashSet<String> = right_nodes
192                .iter()
193                .map(|node| node.id().to_string())
194                .collect();
195
196            // Estimate result capacity (worst case is all from left side)
197            let mut node_result = Vec::with_capacity(left_nodes.len());
198            
199            // Filter left side nodes, exclude nodes with IDs appearing on the right side
200            for node in left_nodes {
201                let node_id = node.id().to_string();
202                if !right_ids.contains(&node_id) {
203                    node_result.push(node);
204                }
205            }
206
207            it.result = SelectionResult::with_nodes(node_result);
208        },
209        OperationResults::Texts(left_texts, right_texts) => {
210            // Create hash set of right side texts
211            let right_text_set: HashSet<String> = right_texts.into_iter().collect();
212            
213            // Estimate result capacity
214            let mut text_result = Vec::with_capacity(left_texts.len());
215            
216            // Filter left side texts, exclude texts appearing on the right side
217            for text in left_texts {
218                if !right_text_set.contains(&text) {
219                    text_result.push(text);
220                }
221            }
222
223            it.result = SelectionResult::with_texts(text_result);
224        }
225    }
226
227    Ok(())
228}
229