Skip to main content

pdf_ast/transform/
mod.rs

1use crate::ast::{AstError, AstNode, AstResult, NodeId, NodeType, PdfAstGraph};
2use crate::types::PdfValue;
3use std::collections::HashMap;
4
5pub mod builder;
6pub mod operations;
7pub mod validator;
8
9pub use builder::*;
10pub use operations::*;
11pub use validator::*;
12
13/// Trait for AST transformations
14pub trait AstTransformer {
15    /// Transform the AST
16    fn transform(&self, graph: &mut PdfAstGraph) -> AstResult<TransformResult>;
17
18    /// Get transformation description
19    fn description(&self) -> &str;
20
21    /// Check if transformation is reversible
22    fn is_reversible(&self) -> bool {
23        false
24    }
25
26    /// Get reverse transformation if available
27    fn reverse_transform(&self) -> Option<Box<dyn AstTransformer>> {
28        None
29    }
30}
31
32/// Result of a transformation
33#[derive(Debug, Clone)]
34pub struct TransformResult {
35    pub nodes_added: Vec<NodeId>,
36    pub nodes_removed: Vec<NodeId>,
37    pub nodes_modified: Vec<NodeId>,
38    pub edges_added: usize,
39    pub edges_removed: usize,
40    pub metadata: HashMap<String, String>,
41}
42
43impl Default for TransformResult {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl TransformResult {
50    pub fn new() -> Self {
51        Self {
52            nodes_added: Vec::new(),
53            nodes_removed: Vec::new(),
54            nodes_modified: Vec::new(),
55            edges_added: 0,
56            edges_removed: 0,
57            metadata: HashMap::new(),
58        }
59    }
60
61    pub fn with_added_node(mut self, node_id: NodeId) -> Self {
62        self.nodes_added.push(node_id);
63        self
64    }
65
66    pub fn with_removed_node(mut self, node_id: NodeId) -> Self {
67        self.nodes_removed.push(node_id);
68        self
69    }
70
71    pub fn with_modified_node(mut self, node_id: NodeId) -> Self {
72        self.nodes_modified.push(node_id);
73        self
74    }
75}
76
77/// Extended mutation API for PdfAstGraph
78impl PdfAstGraph {
79    /// Transform the AST using a transformer
80    pub fn transform<T: AstTransformer>(&mut self, transformer: T) -> AstResult<TransformResult> {
81        transformer.transform(self)
82    }
83
84    /// Replace a node with a new one
85    pub fn replace_node(&mut self, id: NodeId, new_node: AstNode) -> AstResult<AstNode> {
86        if !self.contains_node(id) {
87            return Err(AstError::InvalidReferenceString(format!(
88                "Node {:?} not found",
89                id
90            )));
91        }
92
93        // Get the old node
94        let old_node = self
95            .get_node(id)
96            .ok_or_else(|| AstError::InvalidReferenceString(format!("Node {:?} not found", id)))?
97            .clone();
98
99        // Update the node data
100        if let Some(node_data) = self.get_node_mut(id) {
101            node_data.node_type = new_node.node_type;
102            node_data.value = new_node.value;
103            node_data.metadata = new_node.metadata;
104            // Keep existing children and references
105        }
106
107        Ok(old_node)
108    }
109
110    /// Insert a new node as child of parent
111    pub fn insert_node(&mut self, parent: NodeId, node: AstNode) -> AstResult<NodeId> {
112        if !self.contains_node(parent) {
113            return Err(AstError::InvalidReferenceString(format!(
114                "Parent node {:?} not found",
115                parent
116            )));
117        }
118
119        let new_id = self.create_node(node.node_type, node.value);
120
121        // Set metadata
122        if let Some(new_node) = self.get_node_mut(new_id) {
123            new_node.metadata = node.metadata;
124        }
125
126        // Add edge from parent to new node
127        self.add_edge(parent, new_id, crate::ast::EdgeType::Child);
128
129        Ok(new_id)
130    }
131
132    /// Remove a node and its subtree
133    pub fn remove_subtree(&mut self, root: NodeId) -> AstResult<Vec<AstNode>> {
134        let mut removed_nodes = Vec::new();
135        let mut to_remove = Vec::new();
136
137        // Collect all nodes in subtree
138        self.collect_subtree_nodes(root, &mut to_remove);
139
140        // Remove nodes and collect them
141        for node_id in to_remove {
142            if let Some(node) = self.get_node(node_id).cloned() {
143                removed_nodes.push(node);
144                self.remove_node(node_id);
145
146                // Remove all edges involving this node
147                let all_edges = self.get_all_edges();
148                for edge in all_edges {
149                    if edge.from == node_id || edge.to == node_id {
150                        self.remove_edge(edge.from, edge.to);
151                    }
152                }
153            }
154        }
155
156        Ok(removed_nodes)
157    }
158
159    /// Collect all nodes in a subtree
160    fn collect_subtree_nodes(&self, root: NodeId, result: &mut Vec<NodeId>) {
161        if result.contains(&root) {
162            return; // Avoid infinite loops
163        }
164
165        result.push(root);
166
167        if let Some(node) = self.get_node(root) {
168            for &child_id in &node.children {
169                self.collect_subtree_nodes(child_id, result);
170            }
171        }
172    }
173
174    /// Move a subtree to a new parent
175    pub fn move_subtree(&mut self, subtree_root: NodeId, new_parent: NodeId) -> AstResult<()> {
176        if !self.contains_node(subtree_root) || !self.contains_node(new_parent) {
177            return Err(AstError::InvalidReferenceString(
178                "Invalid node reference".to_string(),
179            ));
180        }
181
182        // Remove old parent-child edge
183        if let Some(old_parent) = self.find_parent(subtree_root) {
184            self.remove_edge(old_parent, subtree_root);
185        }
186
187        // Add new parent-child edge
188        self.add_edge(new_parent, subtree_root, crate::ast::EdgeType::Child);
189
190        Ok(())
191    }
192
193    /// Find parent of a node
194    pub fn find_parent(&self, node_id: NodeId) -> Option<NodeId> {
195        for edge in self.get_all_edges() {
196            let (from, to, edge_type) = (edge.from, edge.to, edge.edge_type);
197            if to == node_id && matches!(edge_type, crate::ast::EdgeType::Child) {
198                return Some(from);
199            }
200        }
201        None
202    }
203
204    /// Clone a subtree
205    pub fn clone_subtree(&mut self, root: NodeId, new_parent: NodeId) -> AstResult<NodeId> {
206        if !self.contains_node(root) || !self.contains_node(new_parent) {
207            return Err(AstError::InvalidReferenceString(
208                "Invalid node reference".to_string(),
209            ));
210        }
211
212        let mut id_mapping = HashMap::new();
213        let cloned_root = self.clone_subtree_recursive(root, &mut id_mapping)?;
214
215        // Add cloned subtree to new parent
216        self.add_edge(new_parent, cloned_root, crate::ast::EdgeType::Child);
217
218        Ok(cloned_root)
219    }
220
221    /// Recursively clone subtree nodes
222    fn clone_subtree_recursive(
223        &mut self,
224        node_id: NodeId,
225        id_mapping: &mut HashMap<NodeId, NodeId>,
226    ) -> AstResult<NodeId> {
227        if let Some(&mapped_id) = id_mapping.get(&node_id) {
228            return Ok(mapped_id);
229        }
230
231        let original_node = self
232            .get_node(node_id)
233            .ok_or_else(|| {
234                AstError::InvalidReferenceString(format!("Node {:?} not found", node_id))
235            })?
236            .clone();
237
238        // Create new node with same type and value
239        let new_id = self.create_node(original_node.node_type.clone(), original_node.value.clone());
240
241        // Copy metadata
242        if let Some(new_node) = self.get_node_mut(new_id) {
243            new_node.metadata = original_node.metadata.clone();
244        }
245
246        id_mapping.insert(node_id, new_id);
247
248        // Clone children
249        for &child_id in &original_node.children {
250            let cloned_child = self.clone_subtree_recursive(child_id, id_mapping)?;
251            self.add_edge(new_id, cloned_child, crate::ast::EdgeType::Child);
252        }
253
254        // Clone references
255        for &ref_id in &original_node.references {
256            if let Some(&cloned_ref) = id_mapping.get(&ref_id) {
257                self.add_edge(new_id, cloned_ref, crate::ast::EdgeType::Reference);
258            }
259        }
260
261        Ok(new_id)
262    }
263
264    /// Merge two nodes (combine their children and references)
265    pub fn merge_nodes(&mut self, target: NodeId, source: NodeId) -> AstResult<()> {
266        if !self.contains_node(target) || !self.contains_node(source) {
267            return Err(AstError::InvalidReferenceString(
268                "Invalid node reference".to_string(),
269            ));
270        }
271
272        if target == source {
273            return Ok(()); // Nothing to merge
274        }
275
276        // Get source node data
277        let source_node = self
278            .get_node(source)
279            .ok_or_else(|| {
280                AstError::InvalidReferenceString(format!("Source node {:?} not found", source))
281            })?
282            .clone();
283
284        // Move children from source to target
285        for &child_id in &source_node.children {
286            self.remove_edge(source, child_id);
287            self.add_edge(target, child_id, crate::ast::EdgeType::Child);
288        }
289
290        // Move references from source to target
291        for &ref_id in &source_node.references {
292            self.remove_edge(source, ref_id);
293            self.add_edge(target, ref_id, crate::ast::EdgeType::Reference);
294        }
295
296        // Remove source node
297        self.remove_subtree(source)?;
298
299        Ok(())
300    }
301
302    /// Update node value
303    pub fn update_node_value(
304        &mut self,
305        node_id: NodeId,
306        new_value: PdfValue,
307    ) -> AstResult<PdfValue> {
308        let node = self.get_node_mut(node_id).ok_or_else(|| {
309            AstError::InvalidReferenceString(format!("Node {:?} not found", node_id))
310        })?;
311
312        let old_value = std::mem::replace(&mut node.value, new_value);
313        Ok(old_value)
314    }
315
316    /// Update node type
317    pub fn update_node_type(&mut self, node_id: NodeId, new_type: NodeType) -> AstResult<NodeType> {
318        let node = self.get_node_mut(node_id).ok_or_else(|| {
319            AstError::InvalidReferenceString(format!("Node {:?} not found", node_id))
320        })?;
321
322        let old_type = std::mem::replace(&mut node.node_type, new_type);
323        Ok(old_type)
324    }
325
326    /// Batch operations
327    pub fn batch_transform<F>(&mut self, operations: F) -> AstResult<TransformResult>
328    where
329        F: FnOnce(&mut BatchOperations) -> AstResult<()>,
330    {
331        let mut batch = BatchOperations::new(self);
332        operations(&mut batch)?;
333        batch.execute()
334    }
335}
336
337/// Batch operations for efficient multiple transformations
338pub struct BatchOperations<'a> {
339    graph: &'a mut PdfAstGraph,
340    operations: Vec<Box<dyn BatchOperation>>,
341}
342
343impl<'a> BatchOperations<'a> {
344    pub fn new(graph: &'a mut PdfAstGraph) -> Self {
345        Self {
346            graph,
347            operations: Vec::new(),
348        }
349    }
350
351    pub fn add_node(&mut self, parent: NodeId, node: AstNode) -> NodeId {
352        let temp_id = NodeId(self.operations.len() + 1000000); // Temporary ID
353        self.operations.push(Box::new(AddNodeOp {
354            parent,
355            node: Some(node),
356            result_id: temp_id,
357        }));
358        temp_id
359    }
360
361    pub fn remove_node(&mut self, node_id: NodeId) {
362        self.operations.push(Box::new(RemoveNodeOp { node_id }));
363    }
364
365    pub fn update_value(&mut self, node_id: NodeId, value: PdfValue) {
366        self.operations.push(Box::new(UpdateValueOp {
367            node_id,
368            value: Some(value),
369        }));
370    }
371
372    pub fn execute(self) -> AstResult<TransformResult> {
373        let mut result = TransformResult::new();
374
375        for operation in self.operations {
376            operation.execute(self.graph, &mut result)?;
377        }
378
379        Ok(result)
380    }
381}
382
383trait BatchOperation {
384    fn execute(&self, graph: &mut PdfAstGraph, result: &mut TransformResult) -> AstResult<()>;
385}
386
387#[allow(dead_code)]
388struct AddNodeOp {
389    parent: NodeId,
390    node: Option<AstNode>,
391    result_id: NodeId,
392}
393
394impl BatchOperation for AddNodeOp {
395    fn execute(&self, graph: &mut PdfAstGraph, result: &mut TransformResult) -> AstResult<()> {
396        if let Some(node) = &self.node {
397            let new_id = graph.insert_node(self.parent, node.clone())?;
398            result.nodes_added.push(new_id);
399            result.edges_added += 1;
400        }
401        Ok(())
402    }
403}
404
405struct RemoveNodeOp {
406    node_id: NodeId,
407}
408
409impl BatchOperation for RemoveNodeOp {
410    fn execute(&self, graph: &mut PdfAstGraph, result: &mut TransformResult) -> AstResult<()> {
411        let removed = graph.remove_subtree(self.node_id)?;
412        for node in removed {
413            result.nodes_removed.push(node.id);
414        }
415        Ok(())
416    }
417}
418
419struct UpdateValueOp {
420    node_id: NodeId,
421    value: Option<PdfValue>,
422}
423
424impl BatchOperation for UpdateValueOp {
425    fn execute(&self, graph: &mut PdfAstGraph, result: &mut TransformResult) -> AstResult<()> {
426        if let Some(value) = &self.value {
427            graph.update_node_value(self.node_id, value.clone())?;
428            result.nodes_modified.push(self.node_id);
429        }
430        Ok(())
431    }
432}