1use crate::error::PoolResult;
2use crate::{node_definition::NodeTree, tree::Tree};
3
4use super::{error::error_helpers, node::Node, types::NodeId};
5use serde::{Deserialize, Serialize};
6use std::time::Instant;
7use std::{sync::Arc};
8use rayon::prelude::*;
9use std::marker::Sync;
10use std::sync::atomic::{AtomicUsize, Ordering};
11use rpds::{VectorSync};
12
13static POOL_ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
15
16type NodeConditionRef<'a> = Box<dyn Fn(&Node) -> bool + Send + Sync + 'a>;
17
18#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
22pub struct NodePool {
23 inner: Arc<Tree>,
25 key: String,
27}
28
29impl NodePool {
30 #[cfg_attr(feature = "dev-tracing", tracing::instrument(skip(inner), fields(
31 crate_name = "model",
32 node_count = inner.nodes.iter().map(|i| i.values().len()).sum::<usize>()
33 )))]
34 pub fn new(inner: Arc<Tree>) -> Arc<NodePool> {
35 let id = POOL_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
36 let pool = Self { inner, key: format!("pool_{id}") };
37 let pool: Arc<NodePool> = Arc::new(pool);
38
39 pool
40 }
41
42 pub fn key(&self) -> &str {
44 &self.key
45 }
46
47 pub fn size(&self) -> usize {
49 self.inner.nodes.iter().map(|i| i.values().len()).sum()
50 }
51
52 pub fn root(&self) -> Option<&Node> {
53 self.inner.get_node(&self.inner.root_id)
54 }
55
56 pub fn root_id(&self) -> &NodeId {
57 &self.inner.root_id
58 }
59
60 pub fn get_inner(&self) -> &Arc<Tree> {
61 &self.inner
62 }
63
64 pub fn from(nodes: NodeTree) -> Arc<NodePool> {
75 let id = POOL_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
76 let pool = Self {
77 inner: Arc::new(Tree::from(nodes)),
78 key: format!("pool_{id}"),
79 };
80 let pool: Arc<NodePool> = Arc::new(pool);
81 pool
82 }
83
84 pub fn get_node(
88 &self,
89 id: &NodeId,
90 ) -> Option<&Node> {
91 self.inner.get_node(id)
92 }
93 pub fn get_parent_node(
94 &self,
95 id: &NodeId,
96 ) -> Option<&Node> {
97 self.inner.get_parent_node(id)
98 }
99
100 pub fn contains_node(
102 &self,
103 id: &NodeId,
104 ) -> bool {
105 self.inner.contains_node(id)
106 }
107
108 pub fn children(
112 &self,
113 parent_id: &NodeId,
114 ) -> Option<VectorSync<NodeId>> {
115 self.get_node(parent_id).map(|n| n.content.clone())
116 }
117
118 pub fn descendants(
120 &self,
121 parent_id: &NodeId,
122 ) -> Vec<Node> {
123 let mut result: Vec<Node> = Vec::new();
124 self._collect_descendants(parent_id, &mut result);
125 result
126 }
127
128 fn _collect_descendants(
129 &self,
130 parent_id: &NodeId,
131 result: &mut Vec<Node>,
132 ) {
133 if let Some(children) = self.children(parent_id) {
134 for child_id in &children {
135 if let Some(child) = self.get_node(child_id) {
136 result.push(child.clone());
137 self._collect_descendants(child_id, result);
138 }
139 }
140 }
141 }
142 pub fn for_each<F>(
143 &self,
144 id: &NodeId,
145 f: F,
146 ) where
147 F: Fn(&Node),
148 {
149 if let Some(children) = self.children(id) {
150 for child_id in &children {
151 if let Some(child) = self.get_node(child_id) {
152 f(&child);
153 }
154 }
155 }
156 }
157 pub fn parent_id(
159 &self,
160 child_id: &NodeId,
161 ) -> Option<&NodeId> {
162 self.inner.parent_map.get(child_id)
163 }
164
165 pub fn ancestors(
167 &self,
168 child_id: &NodeId,
169 ) -> Vec<&Node> {
170 let mut chain = Vec::new();
171 let mut current_id = child_id;
172 while let Some(parent_id) = self.parent_id(current_id) {
173 if let Some(parent) = self.get_node(parent_id) {
174 chain.push(parent);
175 current_id = parent_id;
176 } else {
177 break;
178 }
179 }
180 chain
181 }
182
183 pub fn validate_hierarchy(&self) -> PoolResult<()> {
185 for (child_id, parent_id) in &self.inner.parent_map {
186 if !self.contains_node(parent_id) {
188 return Err(error_helpers::orphan_node(child_id.clone()));
189 }
190
191 if let Some(children) = self.children(parent_id) {
193 let has = children.iter().any(|a| a.eq(child_id));
194 if !has {
195 return Err(error_helpers::invalid_parenting(
196 child_id.clone(),
197 parent_id.clone(),
198 ));
199 }
200 }
201 }
202 Ok(())
203 }
204
205 pub fn filter_nodes<P>(
208 &self,
209 predicate: P,
210 ) -> Vec<&Node>
211 where
212 P: Fn(&Node) -> bool,
213 {
214 self.get_all_nodes().into_iter().filter(|n| predicate(n)).collect()
215 }
216 pub fn find_node<P>(
218 &self,
219 predicate: P,
220 ) -> Option<&Node>
221 where
222 P: Fn(&Node) -> bool,
223 {
224 self.get_all_nodes().into_iter().find(|n| predicate(n))
225 }
226
227 pub fn get_node_depth(
237 &self,
238 node_id: &NodeId,
239 ) -> Option<usize> {
240 let mut depth = 0;
241 let mut current_id = node_id;
242
243 while let Some(parent_id) = self.parent_id(current_id) {
244 depth += 1;
245 current_id = parent_id;
246 }
247
248 Some(depth)
249 }
250
251 pub fn get_node_path(
261 &self,
262 node_id: &NodeId,
263 ) -> Vec<NodeId> {
264 let mut path = Vec::new();
265 let mut current_id = node_id;
266
267 while let Some(parent_id) = self.parent_id(current_id) {
268 path.push(current_id.clone());
269 current_id = parent_id;
270 }
271 path.push(current_id.clone());
272 path.reverse();
273
274 path
275 }
276 pub fn resolve(
278 &self,
279 node_id: &NodeId,
280 ) -> Vec<&Node> {
281 let mut result = Vec::new();
282 let mut current_id = node_id;
283
284 loop {
286 if let Some(node) = self.get_node(current_id) {
287 result.push(node);
288 }
289
290 if let Some(parent_id) = self.parent_id(current_id) {
291 current_id = parent_id;
292 } else {
293 break;
295 }
296 }
297
298 result.reverse();
300 result
301 }
302
303 pub fn is_leaf(
313 &self,
314 node_id: &NodeId,
315 ) -> bool {
316 if let Some(children) = self.children(node_id) {
317 children.is_empty()
318 } else {
319 true
320 }
321 }
322
323 pub fn get_left_siblings(
325 &self,
326 node_id: &NodeId,
327 ) -> Vec<NodeId> {
328 if let Some(parent_id) = self.parent_id(node_id) {
329 if let Some(siblings) = self.children(parent_id) {
330 if let Some(index) =
331 siblings.iter().position(|id| id == node_id)
332 {
333 return siblings.iter().take(index).cloned().collect();
334 } else {
335 eprintln!(
337 "Warning: Node {node_id:?} not found in parent's children list"
338 );
339 }
340 }
341 }
342 Vec::new()
343 }
344 pub fn get_right_siblings(
346 &self,
347 node_id: &NodeId,
348 ) -> Vec<NodeId> {
349 if let Some(parent_id) = self.parent_id(node_id) {
350 if let Some(siblings) = self.children(parent_id) {
351 if let Some(index) =
352 siblings.iter().position(|id| id == node_id)
353 {
354 return siblings.iter().skip(index + 1).cloned().collect();
355 } else {
356 eprintln!(
358 "Warning: Node {node_id:?} not found in parent's children list"
359 );
360 }
361 }
362 }
363 Vec::new()
364 }
365 pub fn get_left_nodes(
367 &self,
368 node_id: &NodeId,
369 ) -> Vec<&Node> {
370 let siblings = self.get_left_siblings(node_id);
371 let mut result = Vec::new();
372 for sibling_id in siblings {
373 if let Some(node) = self.get_node(&sibling_id) {
374 result.push(node);
375 }
376 }
377 result
378 }
379
380 pub fn get_right_nodes(
382 &self,
383 node_id: &NodeId,
384 ) -> Vec<&Node> {
385 let siblings = self.get_right_siblings(node_id);
386 let mut result = Vec::new();
387 for sibling_id in siblings {
388 if let Some(node) = self.get_node(&sibling_id) {
389 result.push(node);
390 }
391 }
392 result
393 }
394
395 pub fn get_all_siblings(
405 &self,
406 node_id: &NodeId,
407 ) -> Vec<NodeId> {
408 if let Some(parent_id) = self.parent_id(node_id) {
409 if let Some(children) = self.children(parent_id) {
410 return children.iter().cloned().collect();
411 }
412 }
413 Vec::new()
414 }
415
416 pub fn get_subtree_size(
426 &self,
427 node_id: &NodeId,
428 ) -> usize {
429 let mut size = 1; if let Some(children) = self.children(node_id) {
431 for child_id in &children {
432 size += self.get_subtree_size(child_id);
433 }
434 }
435 size
436 }
437
438 pub fn is_ancestor(
449 &self,
450 ancestor_id: &NodeId,
451 descendant_id: &NodeId,
452 ) -> bool {
453 let mut current_id = descendant_id;
454 while let Some(parent_id) = self.parent_id(current_id) {
455 if parent_id == ancestor_id {
456 return true;
457 }
458 current_id = parent_id;
459 }
460 false
461 }
462
463 pub fn get_lowest_common_ancestor(
474 &self,
475 node1_id: &NodeId,
476 node2_id: &NodeId,
477 ) -> Option<NodeId> {
478 let path1 = self.get_node_path(node1_id);
479 let path2 = self.get_node_path(node2_id);
480
481 for ancestor_id in path1.iter().rev() {
482 if path2.contains(ancestor_id) {
483 return Some(ancestor_id.clone());
484 }
485 }
486 None
487 }
488
489 pub fn parallel_query<P>(
499 &self,
500 predicate: P,
501 ) -> Vec<&Node>
502 where
503 P: Fn(&Node) -> bool + Send + Sync,
504 {
505 let shards: Vec<_> = self.inner.nodes.iter().collect();
507
508 shards
510 .into_par_iter()
511 .flat_map(|shard| {
512 shard
514 .values()
515 .filter(|node| predicate(node))
516 .collect::<Vec<_>>()
517 })
518 .collect()
519 }
520
521 fn get_all_nodes(&self) -> Vec<&Node> {
523 let mut result = Vec::new();
524 for shard in &self.inner.nodes {
525 for node in shard.values() {
526 result.push(node);
527 }
528 }
529 result
530 }
531}
532
533pub struct QueryEngine<'a> {
535 pool: &'a NodePool,
536 conditions: Vec<NodeConditionRef<'a>>,
537}
538
539impl<'a> QueryEngine<'a> {
540 pub fn new(pool: &'a NodePool) -> Self {
542 Self { pool, conditions: Vec::new() }
543 }
544
545 pub fn by_type(
547 mut self,
548 node_type: &'a str,
549 ) -> Self {
550 let node_type = node_type.to_string();
551 self.conditions.push(Box::new(move |node| node.r#type == node_type));
552 self
553 }
554
555 pub fn by_attr(
557 mut self,
558 key: &'a str,
559 value: &'a serde_json::Value,
560 ) -> Self {
561 let key = key.to_string();
562 let value = value.clone();
563 self.conditions
564 .push(Box::new(move |node| node.attrs.get(&key) == Some(&value)));
565 self
566 }
567
568 pub fn by_mark(
570 mut self,
571 mark_type: &'a str,
572 ) -> Self {
573 let mark_type = mark_type.to_string();
574 self.conditions.push(Box::new(move |node| {
575 node.marks.iter().any(|mark| mark.r#type == mark_type)
576 }));
577 self
578 }
579
580 pub fn by_child_count(
582 mut self,
583 count: usize,
584 ) -> Self {
585 self.conditions.push(Box::new(move |node| node.content.len() == count));
586 self
587 }
588
589 pub fn by_depth(
591 mut self,
592 depth: usize,
593 ) -> Self {
594 let pool = self.pool.clone();
595 self.conditions.push(Box::new(move |node| {
596 pool.get_node_depth(&node.id) == Some(depth)
597 }));
598 self
599 }
600
601 pub fn by_ancestor_type(
603 mut self,
604 ancestor_type: &'a str,
605 ) -> Self {
606 let pool = self.pool.clone();
607 let ancestor_type = ancestor_type.to_string();
608 self.conditions.push(Box::new(move |node| {
609 pool.ancestors(&node.id)
610 .iter()
611 .any(|ancestor| ancestor.r#type == ancestor_type)
612 }));
613 self
614 }
615
616 pub fn by_descendant_type(
618 mut self,
619 descendant_type: &'a str,
620 ) -> Self {
621 let pool = self.pool.clone();
622 let descendant_type = descendant_type.to_string();
623 self.conditions.push(Box::new(move |node| {
624 pool.descendants(&node.id)
625 .iter()
626 .any(|descendant| descendant.r#type == descendant_type)
627 }));
628 self
629 }
630
631 pub fn find_all(&self) -> Vec<&Node> {
633 self.pool
634 .get_all_nodes()
635 .into_iter()
636 .filter(|node| {
637 self.conditions.iter().all(|condition| condition(node))
638 })
639 .collect()
640 }
641
642 pub fn find_first(&self) -> Option<&Node> {
644 self.pool.get_all_nodes().into_iter().find(|node| {
645 self.conditions.iter().all(|condition| condition(node))
646 })
647 }
648
649 pub fn count(&self) -> usize {
651 self.pool
652 .get_all_nodes()
653 .into_iter()
654 .filter(|node| {
655 self.conditions.iter().all(|condition| condition(node))
656 })
657 .count()
658 }
659
660 pub fn parallel_find_all(&self) -> Vec<&Node> {
662 let conditions = &self.conditions;
663 self.pool.parallel_query(|node| {
664 conditions.iter().all(|condition| condition(node))
665 })
666 }
667
668 pub fn parallel_find_first(&self) -> Option<&Node> {
670 let conditions = &self.conditions;
671 self.pool.get_all_nodes().into_par_iter().find_any(move |node| {
672 conditions.iter().all(|condition| condition(node))
673 })
674 }
675
676 pub fn parallel_count(&self) -> usize {
678 let conditions = &self.conditions;
679 self.pool
680 .get_all_nodes()
681 .into_par_iter()
682 .filter(move |node| {
683 conditions.iter().all(|condition| condition(node))
684 })
685 .count()
686 }
687}
688
689impl NodePool {
690 pub fn query(&self) -> QueryEngine<'_> {
692 QueryEngine::new(self)
693 }
694}
695
696#[derive(Clone, Debug)]
698pub struct QueryCacheConfig {
699 pub capacity: usize,
701 pub enabled: bool,
703}
704
705impl Default for QueryCacheConfig {
706 fn default() -> Self {
707 Self { capacity: 1000, enabled: true }
708 }
709}
710
711#[derive(Clone, Debug)]
713pub struct LazyQueryConfig {
714 pub cache_capacity: usize,
716 pub index_cache_capacity: usize,
718 pub cache_enabled: bool,
720 pub index_build_threshold: usize,
722}
723
724impl Default for LazyQueryConfig {
725 fn default() -> Self {
726 Self {
727 cache_capacity: 1000,
728 index_cache_capacity: 100,
729 cache_enabled: true,
730 index_build_threshold: 5,
731 }
732 }
733}
734
735pub struct LazyQueryEngine<'a> {
737 pool: &'a NodePool,
738}
739
740impl<'a> LazyQueryEngine<'a> {
743 pub fn new(pool: &'a NodePool) -> Self {
744 Self { pool: pool }
745 }
746
747 pub fn by_type_lazy(
749 &'a mut self,
750 node_type: &str,
751 ) -> Vec<&'a Node> {
752 let start = Instant::now();
755 let nodes = self.build_type_index(node_type);
756 let duration = start.elapsed();
757
758 println!(
759 "实时构建类型索引 '{}', 耗时: {:?}, 节点数: {}",
760 node_type,
761 duration,
762 nodes.len()
763 );
764 nodes
765 }
766
767 pub fn by_depth_lazy(
769 &mut self,
770 depth: usize,
771 ) -> Vec<&Node> {
772 let start = Instant::now();
773 let nodes = self.build_depth_index(depth);
774 let duration = start.elapsed();
775
776 println!(
777 "实时构建深度索引 {}, 耗时: {:?}, 节点数: {}",
778 depth,
779 duration,
780 nodes.len()
781 );
782 nodes
783 }
784
785 pub fn by_mark_lazy(
787 &'a mut self,
788 mark_type: &str,
789 ) -> Vec<&'a Node> {
790 let start = Instant::now();
791 let nodes = self.build_mark_index(mark_type);
792 let duration = start.elapsed();
793 println!(
794 "实时构建标记索引 '{}', 耗时: {:?}, 节点数: {}",
795 mark_type,
796 duration,
797 nodes.len()
798 );
799 nodes
800 }
801
802 pub fn parallel_query(
804 &'a mut self,
805 conditions: &[QueryCondition],
806 ) -> Vec<&'a Node> {
807 let result = self.pool.parallel_query(|node| {
808 conditions.iter().all(|cond| cond.matches(node))
809 });
810
811 result
812 }
813
814 fn build_type_index(
815 &self,
816 node_type: &str,
817 ) -> Vec<&Node> {
818 self.pool.parallel_query(|node| node.r#type == node_type)
819 }
820
821 fn build_depth_index(
822 &self,
823 target_depth: usize,
824 ) -> Vec<&Node> {
825 self.pool.parallel_query(|node| {
826 self.pool
827 .get_node_depth(&node.id)
828 .map(|depth| depth == target_depth)
829 .unwrap_or(false)
830 })
831 }
832
833 fn build_mark_index(
834 &self,
835 mark_type: &str,
836 ) -> Vec<&Node> {
837 self.pool.parallel_query(|node| {
838 node.marks.iter().any(|mark| mark.r#type == mark_type)
839 })
840 }
841}
842
843#[derive(Debug, Clone)]
845pub enum QueryCondition {
846 ByType(String),
847 ByMark(String),
848 ByAttr { key: String, value: serde_json::Value },
849 IsLeaf,
850 HasChildren,
851}
852
853impl QueryCondition {
854 pub fn matches(
855 &self,
856 node: &Node,
857 ) -> bool {
858 match self {
859 QueryCondition::ByType(type_name) => node.r#type == *type_name,
860 QueryCondition::ByMark(mark_type) => {
861 node.marks.iter().any(|mark| mark.r#type == *mark_type)
862 },
863 QueryCondition::ByAttr { key, value } => {
864 node.attrs.get(key) == Some(value)
865 },
866 QueryCondition::IsLeaf => node.content.is_empty(),
867 QueryCondition::HasChildren => !node.content.is_empty(),
868 }
869 }
870
871 pub fn cache_key(&self) -> String {
872 match self {
873 QueryCondition::ByType(t) => format!("type_{t}"),
874 QueryCondition::ByMark(m) => format!("mark_{m}"),
875 QueryCondition::ByAttr { key, value } => {
876 format!(
877 "attr_{}_{}",
878 key,
879 serde_json::to_string(value).unwrap_or_default()
880 )
881 },
882 QueryCondition::IsLeaf => "is_leaf".to_string(),
883 QueryCondition::HasChildren => "has_children".to_string(),
884 }
885 }
886}
887
888#[derive(Debug)]
890pub struct CacheHitRates {
891 pub query_cache_size: usize,
892 pub type_index_cache_size: usize,
893 pub depth_index_cache_size: usize,
894 pub mark_index_cache_size: usize,
895}
896
897impl NodePool {
898 pub fn lazy_query(&self) -> LazyQueryEngine<'_> {
900 LazyQueryEngine::new(self)
902 }
903}