Skip to main content

haystack_core/graph/
entity_graph.rs

1// EntityGraph — in-memory entity store with bitmap indexing and ref adjacency.
2
3use std::collections::HashMap;
4
5use indexmap::IndexMap;
6use parking_lot::Mutex;
7use rayon::prelude::*;
8
9use crate::data::{HCol, HDict, HGrid};
10use crate::filter::{FilterNode, matches_with_ns, parse_filter};
11use crate::kinds::{HRef, Kind};
12use crate::ontology::{DefNamespace, ValidationIssue};
13
14use super::adjacency::RefAdjacency;
15use super::bitmap::TagBitmapIndex;
16use super::changelog::{DiffOp, GraphDiff};
17use super::columnar::ColumnarStore;
18use super::csr::CsrAdjacency;
19use super::query_planner;
20use super::value_index::ValueIndex;
21
22/// Errors returned by EntityGraph operations.
23#[derive(Debug, thiserror::Error)]
24pub enum GraphError {
25    #[error("entity missing 'id' tag")]
26    MissingId,
27    #[error("entity id must be a Ref")]
28    InvalidId,
29    #[error("entity already exists: {0}")]
30    DuplicateRef(String),
31    #[error("entity not found: {0}")]
32    NotFound(String),
33    #[error("filter error: {0}")]
34    Filter(String),
35}
36
37/// Core entity graph with bitmap tag indexing and bidirectional ref adjacency.
38pub struct EntityGraph {
39    /// ref_val -> entity dict
40    entities: HashMap<String, HDict>,
41    /// ref_val -> internal numeric id (for bitmap indexing)
42    id_map: HashMap<String, usize>,
43    /// internal numeric id -> ref_val
44    reverse_id: HashMap<usize, String>,
45    /// Next internal id to assign.
46    next_id: usize,
47    /// Tag bitmap index for fast has/missing queries.
48    tag_index: TagBitmapIndex,
49    /// Bidirectional ref adjacency for graph traversal.
50    adjacency: RefAdjacency,
51    /// Optional ontology namespace for spec-aware operations.
52    namespace: Option<DefNamespace>,
53    /// Monotonic version counter, incremented on every mutation.
54    version: u64,
55    /// Ordered list of mutations.
56    changelog: std::collections::VecDeque<GraphDiff>,
57    /// LRU query cache: (filter, version) → matching ref_vals.
58    /// Uses Mutex for interior mutability since read_all takes &self.
59    query_cache: Mutex<QueryCache>,
60    /// Parsed filter AST cache: filter_string → AST (version-independent).
61    ast_cache: Mutex<HashMap<String, FilterNode>>,
62    /// Optional B-Tree value indexes for comparison-based filter acceleration.
63    value_index: ValueIndex,
64    /// CSR snapshot of adjacency for cache-friendly traversal.
65    /// Rebuilt lazily via `rebuild_csr()`. `None` until first build.
66    csr: Option<CsrAdjacency>,
67    /// Version at which the CSR was last rebuilt (for staleness detection).
68    csr_version: u64,
69    /// Columnar storage for cache-friendly single-tag scans.
70    columnar: ColumnarStore,
71}
72
73/// Fixed-capacity LRU cache for filter query results using IndexMap for O(1) ops.
74struct QueryCache {
75    /// (filter, version) → matching ref_vals. Most-recently-used at the back.
76    entries: IndexMap<(String, u64), Vec<String>>,
77    capacity: usize,
78}
79
80impl QueryCache {
81    fn new(capacity: usize) -> Self {
82        Self {
83            entries: IndexMap::with_capacity(capacity),
84            capacity,
85        }
86    }
87
88    fn get(&mut self, filter: &str, version: u64) -> Option<&[String]> {
89        // Move to back (most recently used) on access.
90        let key = (filter.to_string(), version);
91        let idx = self.entries.get_index_of(&key)?;
92        self.entries.move_index(idx, self.entries.len() - 1);
93        self.entries.get(&key).map(|v| v.as_slice())
94    }
95
96    fn insert(&mut self, filter: String, version: u64, ref_vals: Vec<String>) {
97        if self.entries.len() >= self.capacity {
98            // Evict least recently used (front).
99            self.entries.shift_remove_index(0);
100        }
101        self.entries.insert((filter, version), ref_vals);
102    }
103}
104
105const MAX_CHANGELOG: usize = 10_000;
106const QUERY_CACHE_CAPACITY: usize = 256;
107
108/// Common Haystack fields to auto-index for O(log N) value lookups.
109const AUTO_INDEX_FIELDS: &[&str] = &[
110    "siteRef", "equipRef", "dis", "curVal", "area", "geoCity", "kind", "unit",
111];
112
113impl EntityGraph {
114    /// Create an empty entity graph with standard Haystack fields auto-indexed.
115    pub fn new() -> Self {
116        let mut value_index = ValueIndex::new();
117        for field in AUTO_INDEX_FIELDS {
118            value_index.index_field(field);
119        }
120        Self {
121            entities: HashMap::new(),
122            id_map: HashMap::new(),
123            reverse_id: HashMap::new(),
124            next_id: 0,
125            tag_index: TagBitmapIndex::new(),
126            adjacency: RefAdjacency::new(),
127            namespace: None,
128            version: 0,
129            changelog: std::collections::VecDeque::new(),
130            query_cache: Mutex::new(QueryCache::new(QUERY_CACHE_CAPACITY)),
131            ast_cache: Mutex::new(HashMap::new()),
132            value_index,
133            csr: None,
134            csr_version: 0,
135            columnar: ColumnarStore::new(),
136        }
137    }
138
139    /// Create an entity graph with an ontology namespace.
140    pub fn with_namespace(ns: DefNamespace) -> Self {
141        Self {
142            namespace: Some(ns),
143            ..Self::new()
144        }
145    }
146
147    // ── Value Indexes ──
148
149    /// Register a field for B-Tree value indexing. Enables O(log N) range
150    /// queries (e.g. `temp > 72`) for this field. Must be called before
151    /// entities are added, or followed by `rebuild_value_index` for existing data.
152    pub fn index_field(&mut self, field: &str) {
153        self.value_index.index_field(field);
154    }
155
156    /// Rebuild the value index for all indexed fields from the current entities.
157    pub fn rebuild_value_index(&mut self) {
158        self.value_index.clear();
159        for (ref_val, entity) in &self.entities {
160            if let Some(&eid) = self.id_map.get(ref_val.as_str()) {
161                for (name, val) in entity.iter() {
162                    if self.value_index.has_index(name) {
163                        self.value_index.add(eid, name, val);
164                    }
165                }
166            }
167        }
168    }
169
170    /// Returns a reference to the value index (for use by the query planner).
171    pub fn value_index(&self) -> &ValueIndex {
172        &self.value_index
173    }
174
175    // ── Columnar Storage ──
176
177    /// Register a tag for columnar storage. Enables cache-friendly sequential
178    /// scans for this tag. Must be called before entities are added, or followed
179    /// by `rebuild_columnar` for existing data.
180    pub fn track_column(&mut self, tag: &str) {
181        self.columnar.track_tag(tag);
182    }
183
184    /// Rebuild all tracked columnar data from current entities.
185    pub fn rebuild_columnar(&mut self) {
186        self.columnar.clear();
187        self.columnar.ensure_capacity(self.next_id);
188        for (ref_val, entity) in &self.entities {
189            if let Some(&eid) = self.id_map.get(ref_val.as_str()) {
190                for (name, val) in entity.iter() {
191                    if self.columnar.is_tracked(name) {
192                        self.columnar.set(eid, name, val);
193                    }
194                }
195            }
196        }
197    }
198
199    /// Returns a reference to the columnar store for direct column scans.
200    pub fn columnar(&self) -> &ColumnarStore {
201        &self.columnar
202    }
203
204    // ── CRUD ──
205
206    /// Add an entity to the graph.
207    ///
208    /// The entity must have an `id` tag that is a `Ref`. Returns the ref
209    /// value string on success.
210    pub fn add(&mut self, entity: HDict) -> Result<String, GraphError> {
211        let ref_val = extract_ref_val(&entity)?;
212
213        if self.entities.contains_key(&ref_val) {
214            return Err(GraphError::DuplicateRef(ref_val));
215        }
216
217        let eid = self.next_id;
218        self.next_id = self.next_id.checked_add(1).ok_or(GraphError::InvalidId)?;
219
220        self.id_map.insert(ref_val.clone(), eid);
221        self.reverse_id.insert(eid, ref_val.clone());
222
223        // Index before inserting (borrows entity immutably, self mutably).
224        self.index_tags(eid, &entity);
225        self.index_refs(eid, &entity);
226
227        // Clone for the changelog, then move the entity into the map.
228        let entity_for_log = entity.clone();
229        self.entities.insert(ref_val.clone(), entity);
230
231        self.version += 1;
232        self.csr = None; // Invalidate CSR snapshot on mutation.
233        self.push_changelog(GraphDiff {
234            version: self.version,
235            op: DiffOp::Add,
236            ref_val: ref_val.clone(),
237            old: None,
238            new: Some(entity_for_log),
239        });
240
241        Ok(ref_val)
242    }
243
244    /// Get a reference to an entity by ref value.
245    pub fn get(&self, ref_val: &str) -> Option<&HDict> {
246        self.entities.get(ref_val)
247    }
248
249    /// Update an existing entity by merging `changes` into it.
250    ///
251    /// Tags in `changes` overwrite existing tags; `Kind::Remove` tags are
252    /// deleted. The `id` tag cannot be changed.
253    pub fn update(&mut self, ref_val: &str, changes: HDict) -> Result<(), GraphError> {
254        let eid = *self
255            .id_map
256            .get(ref_val)
257            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
258
259        // Remove the old entity from the map (avoids an extra clone).
260        let mut old_entity = self
261            .entities
262            .remove(ref_val)
263            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
264
265        // Remove old indexing.
266        self.remove_indexing(eid, &old_entity);
267
268        // Snapshot old state for changelog, then merge in-place.
269        let old_snapshot = old_entity.clone();
270        old_entity.merge(&changes);
271
272        // Re-index before re-inserting (entity is a local value, no borrow conflict).
273        self.index_tags(eid, &old_entity);
274        self.index_refs(eid, &old_entity);
275
276        // Clone for the changelog, then move the updated entity into the map.
277        let updated_for_log = old_entity.clone();
278        self.entities.insert(ref_val.to_string(), old_entity);
279
280        self.version += 1;
281        self.csr = None; // Invalidate CSR snapshot on mutation.
282        self.push_changelog(GraphDiff {
283            version: self.version,
284            op: DiffOp::Update,
285            ref_val: ref_val.to_string(),
286            old: Some(old_snapshot),
287            new: Some(updated_for_log),
288        });
289
290        Ok(())
291    }
292
293    /// Remove an entity from the graph. Returns the removed entity.
294    pub fn remove(&mut self, ref_val: &str) -> Result<HDict, GraphError> {
295        let eid = self
296            .id_map
297            .remove(ref_val)
298            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
299
300        self.reverse_id.remove(&eid);
301
302        let entity = self
303            .entities
304            .remove(ref_val)
305            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
306
307        self.remove_indexing(eid, &entity);
308
309        self.version += 1;
310        self.csr = None; // Invalidate CSR snapshot on mutation.
311        self.push_changelog(GraphDiff {
312            version: self.version,
313            op: DiffOp::Remove,
314            ref_val: ref_val.to_string(),
315            old: Some(entity.clone()),
316            new: None,
317        });
318
319        Ok(entity)
320    }
321
322    // ── Query ──
323
324    /// Run a filter expression and return matching entities as a grid.
325    pub fn read(&self, filter_expr: &str, limit: usize) -> Result<HGrid, GraphError> {
326        let results = self.read_all(filter_expr, limit)?;
327
328        if results.is_empty() {
329            return Ok(HGrid::new());
330        }
331
332        // Collect all unique column names.
333        let mut col_set: Vec<String> = Vec::new();
334        let mut seen = std::collections::HashSet::new();
335        for entity in &results {
336            for name in entity.tag_names() {
337                if seen.insert(name.to_string()) {
338                    col_set.push(name.to_string());
339                }
340            }
341        }
342        col_set.sort();
343        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
344        let rows: Vec<HDict> = results.into_iter().cloned().collect();
345
346        Ok(HGrid::from_parts(HDict::new(), cols, rows))
347    }
348
349    /// Run a filter expression and return matching entities as references.
350    pub fn read_all(&self, filter_expr: &str, limit: usize) -> Result<Vec<&HDict>, GraphError> {
351        let effective_limit = if limit == 0 { usize::MAX } else { limit };
352
353        // Check query cache (version-keyed, so mutations auto-invalidate).
354        {
355            let mut cache = self.query_cache.lock();
356            if let Some(cached_refs) = cache.get(filter_expr, self.version) {
357                let mut results = Vec::new();
358                for rv in cached_refs {
359                    if results.len() >= effective_limit {
360                        break;
361                    }
362                    if let Some(entity) = self.entities.get(rv) {
363                        results.push(entity);
364                    }
365                }
366                return Ok(results);
367            }
368        }
369
370        // Use cached AST or parse and cache it (ASTs are version-independent).
371        let ast = {
372            let mut ast_cache = self.ast_cache.lock();
373            if let Some(cached) = ast_cache.get(filter_expr) {
374                cached.clone()
375            } else {
376                let parsed =
377                    parse_filter(filter_expr).map_err(|e| GraphError::Filter(e.to_string()))?;
378                ast_cache.insert(filter_expr.to_string(), parsed.clone());
379                parsed
380            }
381        };
382
383        // Phase 1: bitmap acceleration (with value index + adjacency).
384        let max_id = self.next_id;
385        let candidates = query_planner::bitmap_candidates_with_values(
386            &ast,
387            &self.tag_index,
388            &self.value_index,
389            &self.adjacency,
390            max_id,
391        );
392
393        // Phase 2: full filter evaluation.
394        let resolver = |r: &HRef| -> Option<&HDict> { self.entities.get(&r.val) };
395        let ns = self.namespace.as_ref();
396
397        // Use parallel evaluation only for large unlimited queries where rayon
398        // overhead is worthwhile. Bounded queries always use sequential + early exit.
399        const PARALLEL_THRESHOLD: usize = 500;
400        let use_parallel = effective_limit == usize::MAX;
401
402        let mut results: Vec<&HDict>;
403
404        if let Some(ref bitmap) = candidates {
405            let candidate_ids: Vec<usize> = TagBitmapIndex::iter_set_bits(bitmap).collect();
406
407            if candidate_ids.len() >= PARALLEL_THRESHOLD && use_parallel {
408                results = candidate_ids
409                    .par_iter()
410                    .filter_map(|&eid| {
411                        let ref_val = self.reverse_id.get(&eid)?;
412                        let entity = self.entities.get(ref_val)?;
413                        if matches_with_ns(&ast, entity, Some(&resolver), ns) {
414                            Some(entity)
415                        } else {
416                            None
417                        }
418                    })
419                    .collect();
420            } else {
421                results = Vec::new();
422                for eid in TagBitmapIndex::iter_set_bits(bitmap) {
423                    if results.len() >= effective_limit {
424                        break;
425                    }
426                    if let Some(ref_val) = self.reverse_id.get(&eid)
427                        && let Some(entity) = self.entities.get(ref_val)
428                        && matches_with_ns(&ast, entity, Some(&resolver), ns)
429                    {
430                        results.push(entity);
431                    }
432                }
433            }
434        } else {
435            let entity_count = self.entities.len();
436
437            if entity_count >= PARALLEL_THRESHOLD && use_parallel {
438                results = self
439                    .entities
440                    .par_iter()
441                    .filter_map(|(_, entity)| {
442                        if matches_with_ns(&ast, entity, Some(&resolver), ns) {
443                            Some(entity)
444                        } else {
445                            None
446                        }
447                    })
448                    .collect();
449            } else {
450                results = Vec::new();
451                for entity in self.entities.values() {
452                    if results.len() >= effective_limit {
453                        break;
454                    }
455                    if matches_with_ns(&ast, entity, Some(&resolver), ns) {
456                        results.push(entity);
457                    }
458                }
459            }
460        }
461
462        if results.len() > effective_limit {
463            results.truncate(effective_limit);
464        }
465
466        // Populate cache with result ref_vals (only for unlimited queries to
467        // avoid caching partial results that depend on limit).
468        if limit == 0 {
469            let ref_vals: Vec<String> = results
470                .iter()
471                .filter_map(|e| {
472                    e.get("id").and_then(|k| match k {
473                        Kind::Ref(r) => Some(r.val.clone()),
474                        _ => None,
475                    })
476                })
477                .collect();
478            let mut cache = self.query_cache.lock();
479            cache.insert(filter_expr.to_string(), self.version, ref_vals);
480        }
481
482        Ok(results)
483    }
484
485    // ── Ref traversal ──
486
487    /// Get ref values that the given entity points to.
488    pub fn refs_from(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
489        match self.id_map.get(ref_val) {
490            Some(&eid) => {
491                if let Some(csr) = &self.csr {
492                    csr.targets_from(eid, ref_type)
493                } else {
494                    self.adjacency.targets_from(eid, ref_type)
495                }
496            }
497            None => Vec::new(),
498        }
499    }
500
501    /// Get ref values of entities that point to the given entity.
502    pub fn refs_to(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
503        if let Some(csr) = &self.csr {
504            csr.sources_to(ref_val, ref_type)
505                .iter()
506                .filter_map(|eid| self.reverse_id.get(eid).cloned())
507                .collect()
508        } else {
509            self.adjacency
510                .sources_to(ref_val, ref_type)
511                .iter()
512                .filter_map(|eid| self.reverse_id.get(eid).cloned())
513                .collect()
514        }
515    }
516
517    /// Rebuild the CSR snapshot from the current adjacency.
518    /// Should be called after a batch of mutations (e.g., import, sync).
519    pub fn rebuild_csr(&mut self) {
520        let max_id = if self.next_id > 0 { self.next_id } else { 0 };
521        self.csr = Some(CsrAdjacency::from_ref_adjacency(&self.adjacency, max_id));
522        self.csr_version = self.version;
523    }
524
525    /// Returns true if the CSR snapshot is stale (version mismatch).
526    pub fn csr_is_stale(&self) -> bool {
527        match &self.csr {
528            Some(_) => self.csr_version != self.version,
529            None => true,
530        }
531    }
532
533    // ── Graph traversal ──
534
535    /// Return all edges in the graph as `(source_ref, ref_tag, target_ref)` tuples.
536    pub fn all_edges(&self) -> Vec<(String, String, String)> {
537        let mut edges = Vec::new();
538        for (&eid, ref_val) in &self.reverse_id {
539            if let Some(fwd) = self.adjacency.forward_raw().get(&eid) {
540                for (ref_tag, target) in fwd {
541                    edges.push((ref_val.clone(), ref_tag.clone(), target.clone()));
542                }
543            }
544        }
545        edges
546    }
547
548    /// BFS neighborhood: return entities and edges within `hops` of `ref_val`.
549    ///
550    /// `ref_types` optionally restricts which ref tags are traversed.
551    /// Returns `(entities, edges)` where edges are `(source, ref_tag, target)`.
552    pub fn neighbors(
553        &self,
554        ref_val: &str,
555        hops: usize,
556        ref_types: Option<&[&str]>,
557    ) -> (Vec<&HDict>, Vec<(String, String, String)>) {
558        use std::collections::{HashSet, VecDeque};
559
560        let start_eid = match self.id_map.get(ref_val) {
561            Some(&eid) => eid,
562            None => return (Vec::new(), Vec::new()),
563        };
564
565        let mut visited: HashSet<usize> = HashSet::new();
566        let mut queue: VecDeque<(usize, usize)> = VecDeque::new();
567        let mut result_entities: Vec<&HDict> = Vec::new();
568        let mut result_edges: Vec<(String, String, String)> = Vec::new();
569
570        visited.insert(start_eid);
571        queue.push_back((start_eid, 0));
572
573        if let Some(entity) = self.entities.get(ref_val) {
574            result_entities.push(entity);
575        }
576
577        while let Some((current_eid, depth)) = queue.pop_front() {
578            if depth >= hops {
579                continue;
580            }
581            let current_ref = match self.reverse_id.get(&current_eid) {
582                Some(s) => s.as_str(),
583                None => continue,
584            };
585
586            // Traverse forward edges
587            if let Some(fwd) = self.adjacency.forward_raw().get(&current_eid) {
588                for (ref_tag, target) in fwd {
589                    if let Some(types) = ref_types
590                        && !types.iter().any(|t| t == ref_tag)
591                    {
592                        continue;
593                    }
594                    result_edges.push((current_ref.to_string(), ref_tag.clone(), target.clone()));
595                    if let Some(&target_eid) = self.id_map.get(target.as_str())
596                        && visited.insert(target_eid)
597                    {
598                        if let Some(entity) = self.entities.get(target.as_str()) {
599                            result_entities.push(entity);
600                        }
601                        queue.push_back((target_eid, depth + 1));
602                    }
603                }
604            }
605            // Traverse reverse edges
606            if let Some(rev) = self.adjacency.reverse_raw().get(current_ref) {
607                for (ref_tag, source_eid) in rev {
608                    if let Some(types) = ref_types
609                        && !types.iter().any(|t| t == ref_tag)
610                    {
611                        continue;
612                    }
613                    if let Some(source_ref) = self.reverse_id.get(source_eid) {
614                        result_edges.push((
615                            source_ref.clone(),
616                            ref_tag.clone(),
617                            current_ref.to_string(),
618                        ));
619                        if visited.insert(*source_eid) {
620                            if let Some(entity) = self.entities.get(source_ref.as_str()) {
621                                result_entities.push(entity);
622                            }
623                            queue.push_back((*source_eid, depth + 1));
624                        }
625                    }
626                }
627            }
628        }
629
630        result_entities.sort_by(|a, b| {
631            let a_id = a.id().map(|r| r.val.as_str()).unwrap_or("");
632            let b_id = b.id().map(|r| r.val.as_str()).unwrap_or("");
633            a_id.cmp(b_id)
634        });
635        result_edges.sort();
636        // Deduplicate edges (reverse traversal can produce duplicates)
637        result_edges.dedup();
638
639        (result_entities, result_edges)
640    }
641
642    /// BFS shortest path from `from` to `to`. Returns ordered ref_vals, or
643    /// empty vec if no path exists.
644    pub fn shortest_path(&self, from: &str, to: &str) -> Vec<String> {
645        use std::collections::{HashMap as StdHashMap, VecDeque};
646
647        if from == to {
648            return vec![from.to_string()];
649        }
650        let from_eid = match self.id_map.get(from) {
651            Some(&eid) => eid,
652            None => return Vec::new(),
653        };
654        let to_eid = match self.id_map.get(to) {
655            Some(&eid) => eid,
656            None => return Vec::new(),
657        };
658
659        // parent map: child_eid -> parent_eid (usize::MAX = root sentinel)
660        let mut parents: StdHashMap<usize, usize> = StdHashMap::new();
661        let mut queue: VecDeque<usize> = VecDeque::new();
662        parents.insert(from_eid, usize::MAX);
663        queue.push_back(from_eid);
664
665        while let Some(current_eid) = queue.pop_front() {
666            let current_ref = match self.reverse_id.get(&current_eid) {
667                Some(s) => s.as_str(),
668                None => continue,
669            };
670
671            // Forward edges
672            if let Some(fwd) = self.adjacency.forward_raw().get(&current_eid) {
673                for (_, target) in fwd {
674                    if let Some(&target_eid) = self.id_map.get(target.as_str())
675                        && let std::collections::hash_map::Entry::Vacant(e) =
676                            parents.entry(target_eid)
677                    {
678                        e.insert(current_eid);
679                        if target_eid == to_eid {
680                            return Self::reconstruct_path_usize(
681                                &parents,
682                                to_eid,
683                                &self.reverse_id,
684                            );
685                        }
686                        queue.push_back(target_eid);
687                    }
688                }
689            }
690            // Reverse edges
691            if let Some(rev) = self.adjacency.reverse_raw().get(current_ref) {
692                for (_, source_eid) in rev {
693                    if !parents.contains_key(source_eid) {
694                        parents.insert(*source_eid, current_eid);
695                        if *source_eid == to_eid {
696                            return Self::reconstruct_path_usize(
697                                &parents,
698                                to_eid,
699                                &self.reverse_id,
700                            );
701                        }
702                        queue.push_back(*source_eid);
703                    }
704                }
705            }
706        }
707
708        Vec::new() // No path found
709    }
710
711    /// Reconstruct path from usize-based BFS parent map.
712    fn reconstruct_path_usize(
713        parents: &std::collections::HashMap<usize, usize>,
714        to_eid: usize,
715        reverse_id: &HashMap<usize, String>,
716    ) -> Vec<String> {
717        let mut path_eids = vec![to_eid];
718        let mut current = to_eid;
719        while let Some(&parent) = parents.get(&current) {
720            if parent == usize::MAX {
721                break;
722            }
723            path_eids.push(parent);
724            current = parent;
725        }
726        path_eids.reverse();
727        path_eids
728            .iter()
729            .filter_map(|eid| reverse_id.get(eid).cloned())
730            .collect()
731    }
732
733    /// Return the subtree rooted at `root` up to `max_depth` levels.
734    ///
735    /// Walks reverse refs (children referencing parent). Returns entities
736    /// paired with their depth from root. Root is depth 0.
737    pub fn subtree(&self, root: &str, max_depth: usize) -> Vec<(&HDict, usize)> {
738        use std::collections::{HashSet, VecDeque};
739
740        let mut visited: HashSet<String> = HashSet::new();
741        let mut queue: VecDeque<(String, usize)> = VecDeque::new();
742        let mut results: Vec<(&HDict, usize)> = Vec::new();
743
744        visited.insert(root.to_string());
745        queue.push_back((root.to_string(), 0));
746
747        if let Some(entity) = self.entities.get(root) {
748            results.push((entity, 0));
749        } else {
750            return Vec::new();
751        }
752
753        while let Some((current, depth)) = queue.pop_front() {
754            if depth >= max_depth {
755                continue;
756            }
757            // Children = entities that reference current (reverse refs)
758            let child_refs = self.refs_to(&current, None);
759            for child_ref in child_refs {
760                if visited.insert(child_ref.clone())
761                    && let Some(entity) = self.entities.get(&child_ref)
762                {
763                    results.push((entity, depth + 1));
764                    queue.push_back((child_ref, depth + 1));
765                }
766            }
767        }
768
769        results
770    }
771
772    // ── Spec-aware ──
773
774    /// Find all entities that structurally fit a spec/type name.
775    ///
776    /// Requires a namespace to be set. Returns empty if no namespace.
777    pub fn entities_fitting(&self, spec_name: &str) -> Vec<&HDict> {
778        match &self.namespace {
779            Some(ns) => self
780                .entities
781                .values()
782                .filter(|e| ns.fits(e, spec_name))
783                .collect(),
784            None => Vec::new(),
785        }
786    }
787
788    /// Validate all entities against the namespace and check for dangling refs.
789    ///
790    /// Returns empty if no namespace is set and no dangling refs exist.
791    pub fn validate(&self) -> Vec<ValidationIssue> {
792        let mut issues: Vec<ValidationIssue> = match &self.namespace {
793            Some(ns) => self
794                .entities
795                .values()
796                .flat_map(|e| ns.validate_entity(e))
797                .collect(),
798            None => Vec::new(),
799        };
800
801        // Check for dangling refs: Ref values (except `id`) that point to
802        // entities not present in the graph.
803        for entity in self.entities.values() {
804            let entity_ref = entity.id().map(|r| r.val.as_str());
805            for (name, val) in entity.iter() {
806                if name == "id" {
807                    continue;
808                }
809                if let Kind::Ref(r) = val
810                    && !self.entities.contains_key(&r.val)
811                {
812                    issues.push(ValidationIssue {
813                        entity: entity_ref.map(|s| s.to_string()),
814                        issue_type: "dangling_ref".to_string(),
815                        detail: format!(
816                            "tag '{}' references '{}' which does not exist in the graph",
817                            name, r.val
818                        ),
819                    });
820                }
821            }
822        }
823
824        issues
825    }
826
827    // ── Serialization ──
828
829    /// Convert matching entities to a grid.
830    ///
831    /// If `filter_expr` is empty, exports all entities.
832    /// Otherwise, delegates to `read`.
833    pub fn to_grid(&self, filter_expr: &str) -> Result<HGrid, GraphError> {
834        if filter_expr.is_empty() {
835            let entities: Vec<&HDict> = self.entities.values().collect();
836            return Ok(Self::entities_to_grid(&entities));
837        }
838        self.read(filter_expr, 0)
839    }
840
841    /// Build a grid from a slice of entity references.
842    fn entities_to_grid(entities: &[&HDict]) -> HGrid {
843        if entities.is_empty() {
844            return HGrid::new();
845        }
846
847        let mut col_set: Vec<String> = Vec::new();
848        let mut seen = std::collections::HashSet::new();
849        for entity in entities {
850            for name in entity.tag_names() {
851                if seen.insert(name.to_string()) {
852                    col_set.push(name.to_string());
853                }
854            }
855        }
856        col_set.sort();
857        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
858        let rows: Vec<HDict> = entities.iter().map(|e| (*e).clone()).collect();
859
860        HGrid::from_parts(HDict::new(), cols, rows)
861    }
862
863    /// Build an EntityGraph from a grid.
864    ///
865    /// Rows without a valid `id` Ref tag are silently skipped.
866    pub fn from_grid(grid: &HGrid, namespace: Option<DefNamespace>) -> Result<Self, GraphError> {
867        let mut graph = match namespace {
868            Some(ns) => Self::with_namespace(ns),
869            None => Self::new(),
870        };
871        for row in &grid.rows {
872            if row.id().is_some() {
873                graph.add(row.clone())?;
874            }
875        }
876        // Bulk import: build CSR once after all adds.
877        graph.rebuild_csr();
878        Ok(graph)
879    }
880
881    // ── Change tracking ──
882
883    /// Get changelog entries since a given version.
884    pub fn changes_since(&self, version: u64) -> Vec<&GraphDiff> {
885        let target = version + 1;
886        // VecDeque may not be contiguous, so collect matching entries.
887        self.changelog
888            .iter()
889            .filter(|d| d.version >= target)
890            .collect()
891    }
892
893    /// Current graph version (monotonically increasing).
894    pub fn version(&self) -> u64 {
895        self.version
896    }
897
898    // ── Container ──
899
900    /// Number of entities in the graph.
901    pub fn len(&self) -> usize {
902        self.entities.len()
903    }
904
905    /// Returns `true` if the graph has no entities.
906    pub fn is_empty(&self) -> bool {
907        self.entities.is_empty()
908    }
909
910    /// Returns `true` if an entity with the given ref value exists.
911    pub fn contains(&self, ref_val: &str) -> bool {
912        self.entities.contains_key(ref_val)
913    }
914
915    /// Returns references to all entities in the graph.
916    pub fn all(&self) -> Vec<&HDict> {
917        self.entities.values().collect()
918    }
919
920    // ── Internal indexing ──
921
922    /// Add tag bitmap entries for an entity.
923    fn index_tags(&mut self, entity_id: usize, entity: &HDict) {
924        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
925        self.tag_index.add(entity_id, &tags);
926
927        // Update value indexes for any indexed fields present on this entity.
928        for (name, val) in entity.iter() {
929            if self.value_index.has_index(name) {
930                self.value_index.add(entity_id, name, val);
931            }
932            // Update columnar store for tracked tags.
933            if self.columnar.is_tracked(name) {
934                self.columnar.set(entity_id, name, val);
935            }
936        }
937    }
938
939    /// Add ref adjacency entries for an entity.
940    fn index_refs(&mut self, entity_id: usize, entity: &HDict) {
941        for (name, val) in entity.iter() {
942            if let Kind::Ref(r) = val {
943                // Skip the "id" tag — it is the entity's own identity,
944                // not a reference edge.
945                if name != "id" {
946                    self.adjacency.add(entity_id, name, &r.val);
947                }
948            }
949        }
950    }
951
952    /// Remove all index entries for an entity.
953    fn remove_indexing(&mut self, entity_id: usize, entity: &HDict) {
954        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
955        self.tag_index.remove(entity_id, &tags);
956        self.adjacency.remove(entity_id);
957
958        // Remove from value indexes.
959        for (name, val) in entity.iter() {
960            if self.value_index.has_index(name) {
961                self.value_index.remove(entity_id, name, val);
962            }
963        }
964
965        // Clear columnar data for this entity.
966        self.columnar.clear_entity(entity_id);
967    }
968
969    /// Append a diff to the changelog, capping it at [`MAX_CHANGELOG`] entries.
970    fn push_changelog(&mut self, diff: GraphDiff) {
971        self.changelog.push_back(diff);
972        while self.changelog.len() > MAX_CHANGELOG {
973            self.changelog.pop_front();
974        }
975    }
976}
977
978impl Default for EntityGraph {
979    fn default() -> Self {
980        Self::new()
981    }
982}
983
984/// Extract the ref value string from an entity's `id` tag.
985fn extract_ref_val(entity: &HDict) -> Result<String, GraphError> {
986    match entity.get("id") {
987        Some(Kind::Ref(r)) => Ok(r.val.clone()),
988        Some(_) => Err(GraphError::InvalidId),
989        None => Err(GraphError::MissingId),
990    }
991}
992
993#[cfg(test)]
994mod tests {
995    use super::*;
996    use crate::kinds::Number;
997
998    fn make_site(id: &str) -> HDict {
999        let mut d = HDict::new();
1000        d.set("id", Kind::Ref(HRef::from_val(id)));
1001        d.set("site", Kind::Marker);
1002        d.set("dis", Kind::Str(format!("Site {id}")));
1003        d.set(
1004            "area",
1005            Kind::Number(Number::new(4500.0, Some("ft\u{00b2}".into()))),
1006        );
1007        d
1008    }
1009
1010    fn make_equip(id: &str, site_ref: &str) -> HDict {
1011        let mut d = HDict::new();
1012        d.set("id", Kind::Ref(HRef::from_val(id)));
1013        d.set("equip", Kind::Marker);
1014        d.set("dis", Kind::Str(format!("Equip {id}")));
1015        d.set("siteRef", Kind::Ref(HRef::from_val(site_ref)));
1016        d
1017    }
1018
1019    fn make_point(id: &str, equip_ref: &str) -> HDict {
1020        let mut d = HDict::new();
1021        d.set("id", Kind::Ref(HRef::from_val(id)));
1022        d.set("point", Kind::Marker);
1023        d.set("sensor", Kind::Marker);
1024        d.set("temp", Kind::Marker);
1025        d.set("dis", Kind::Str(format!("Point {id}")));
1026        d.set("equipRef", Kind::Ref(HRef::from_val(equip_ref)));
1027        d.set(
1028            "curVal",
1029            Kind::Number(Number::new(72.5, Some("\u{00b0}F".into()))),
1030        );
1031        d
1032    }
1033
1034    // ── Add tests ──
1035
1036    #[test]
1037    fn add_entity_with_valid_id() {
1038        let mut g = EntityGraph::new();
1039        let result = g.add(make_site("site-1"));
1040        assert!(result.is_ok());
1041        assert_eq!(result.unwrap(), "site-1");
1042        assert_eq!(g.len(), 1);
1043    }
1044
1045    #[test]
1046    fn add_entity_missing_id_fails() {
1047        let mut g = EntityGraph::new();
1048        let entity = HDict::new();
1049        let err = g.add(entity).unwrap_err();
1050        assert!(matches!(err, GraphError::MissingId));
1051    }
1052
1053    #[test]
1054    fn add_entity_non_ref_id_fails() {
1055        let mut g = EntityGraph::new();
1056        let mut entity = HDict::new();
1057        entity.set("id", Kind::Str("not-a-ref".into()));
1058        let err = g.add(entity).unwrap_err();
1059        assert!(matches!(err, GraphError::InvalidId));
1060    }
1061
1062    #[test]
1063    fn add_duplicate_ref_fails() {
1064        let mut g = EntityGraph::new();
1065        g.add(make_site("site-1")).unwrap();
1066        let err = g.add(make_site("site-1")).unwrap_err();
1067        assert!(matches!(err, GraphError::DuplicateRef(_)));
1068    }
1069
1070    // ── Get tests ──
1071
1072    #[test]
1073    fn get_existing_entity() {
1074        let mut g = EntityGraph::new();
1075        g.add(make_site("site-1")).unwrap();
1076        let entity = g.get("site-1").unwrap();
1077        assert!(entity.has("site"));
1078        assert_eq!(entity.get("dis"), Some(&Kind::Str("Site site-1".into())));
1079    }
1080
1081    #[test]
1082    fn get_missing_entity_returns_none() {
1083        let g = EntityGraph::new();
1084        assert!(g.get("nonexistent").is_none());
1085    }
1086
1087    // ── Update tests ──
1088
1089    #[test]
1090    fn update_merges_changes() {
1091        let mut g = EntityGraph::new();
1092        g.add(make_site("site-1")).unwrap();
1093
1094        let mut changes = HDict::new();
1095        changes.set("dis", Kind::Str("Updated Site".into()));
1096        changes.set("geoCity", Kind::Str("Richmond".into()));
1097        g.update("site-1", changes).unwrap();
1098
1099        let entity = g.get("site-1").unwrap();
1100        assert_eq!(entity.get("dis"), Some(&Kind::Str("Updated Site".into())));
1101        assert_eq!(entity.get("geoCity"), Some(&Kind::Str("Richmond".into())));
1102        assert!(entity.has("site")); // unchanged
1103    }
1104
1105    #[test]
1106    fn update_missing_entity_fails() {
1107        let mut g = EntityGraph::new();
1108        let err = g.update("nonexistent", HDict::new()).unwrap_err();
1109        assert!(matches!(err, GraphError::NotFound(_)));
1110    }
1111
1112    // ── Remove tests ──
1113
1114    #[test]
1115    fn remove_entity() {
1116        let mut g = EntityGraph::new();
1117        g.add(make_site("site-1")).unwrap();
1118        let removed = g.remove("site-1").unwrap();
1119        assert!(removed.has("site"));
1120        assert!(g.get("site-1").is_none());
1121        assert_eq!(g.len(), 0);
1122    }
1123
1124    #[test]
1125    fn remove_missing_entity_fails() {
1126        let mut g = EntityGraph::new();
1127        let err = g.remove("nonexistent").unwrap_err();
1128        assert!(matches!(err, GraphError::NotFound(_)));
1129    }
1130
1131    // ── Version / changelog tests ──
1132
1133    #[test]
1134    fn version_increments_on_mutations() {
1135        let mut g = EntityGraph::new();
1136        assert_eq!(g.version(), 0);
1137
1138        g.add(make_site("site-1")).unwrap();
1139        assert_eq!(g.version(), 1);
1140
1141        g.update("site-1", HDict::new()).unwrap();
1142        assert_eq!(g.version(), 2);
1143
1144        g.remove("site-1").unwrap();
1145        assert_eq!(g.version(), 3);
1146    }
1147
1148    #[test]
1149    fn changelog_records_add_update_remove() {
1150        let mut g = EntityGraph::new();
1151        g.add(make_site("site-1")).unwrap();
1152        g.update("site-1", HDict::new()).unwrap();
1153        g.remove("site-1").unwrap();
1154
1155        let changes = g.changes_since(0);
1156        assert_eq!(changes.len(), 3);
1157        assert_eq!(changes[0].op, DiffOp::Add);
1158        assert_eq!(changes[0].ref_val, "site-1");
1159        assert!(changes[0].old.is_none());
1160        assert!(changes[0].new.is_some());
1161
1162        assert_eq!(changes[1].op, DiffOp::Update);
1163        assert!(changes[1].old.is_some());
1164        assert!(changes[1].new.is_some());
1165
1166        assert_eq!(changes[2].op, DiffOp::Remove);
1167        assert!(changes[2].old.is_some());
1168        assert!(changes[2].new.is_none());
1169    }
1170
1171    #[test]
1172    fn changes_since_returns_subset() {
1173        let mut g = EntityGraph::new();
1174        g.add(make_site("site-1")).unwrap(); // v1
1175        g.add(make_site("site-2")).unwrap(); // v2
1176        g.add(make_site("site-3")).unwrap(); // v3
1177
1178        let since_v2 = g.changes_since(2);
1179        assert_eq!(since_v2.len(), 1);
1180        assert_eq!(since_v2[0].ref_val, "site-3");
1181    }
1182
1183    // ── Container tests ──
1184
1185    #[test]
1186    fn contains_check() {
1187        let mut g = EntityGraph::new();
1188        g.add(make_site("site-1")).unwrap();
1189        assert!(g.contains("site-1"));
1190        assert!(!g.contains("site-2"));
1191    }
1192
1193    #[test]
1194    fn len_and_is_empty() {
1195        let mut g = EntityGraph::new();
1196        assert!(g.is_empty());
1197        assert_eq!(g.len(), 0);
1198
1199        g.add(make_site("site-1")).unwrap();
1200        assert!(!g.is_empty());
1201        assert_eq!(g.len(), 1);
1202    }
1203
1204    // ── Query tests ──
1205
1206    #[test]
1207    fn read_with_simple_has_filter() {
1208        let mut g = EntityGraph::new();
1209        g.add(make_site("site-1")).unwrap();
1210        g.add(make_equip("equip-1", "site-1")).unwrap();
1211
1212        let results = g.read_all("site", 0).unwrap();
1213        assert_eq!(results.len(), 1);
1214        assert!(results[0].has("site"));
1215    }
1216
1217    #[test]
1218    fn read_with_comparison_filter() {
1219        let mut g = EntityGraph::new();
1220        g.add(make_point("pt-1", "equip-1")).unwrap();
1221
1222        let results = g.read_all("curVal > 70\u{00b0}F", 0).unwrap();
1223        assert_eq!(results.len(), 1);
1224    }
1225
1226    #[test]
1227    fn read_with_and_filter() {
1228        let mut g = EntityGraph::new();
1229        g.add(make_point("pt-1", "equip-1")).unwrap();
1230        g.add(make_equip("equip-1", "site-1")).unwrap();
1231
1232        let results = g.read_all("point and sensor", 0).unwrap();
1233        assert_eq!(results.len(), 1);
1234    }
1235
1236    #[test]
1237    fn read_with_or_filter() {
1238        let mut g = EntityGraph::new();
1239        g.add(make_site("site-1")).unwrap();
1240        g.add(make_equip("equip-1", "site-1")).unwrap();
1241
1242        let results = g.read_all("site or equip", 0).unwrap();
1243        assert_eq!(results.len(), 2);
1244    }
1245
1246    #[test]
1247    fn read_limit_parameter_works() {
1248        let mut g = EntityGraph::new();
1249        g.add(make_site("site-1")).unwrap();
1250        g.add(make_site("site-2")).unwrap();
1251        g.add(make_site("site-3")).unwrap();
1252
1253        let results = g.read_all("site", 2).unwrap();
1254        assert_eq!(results.len(), 2);
1255    }
1256
1257    #[test]
1258    fn read_returns_grid() {
1259        let mut g = EntityGraph::new();
1260        g.add(make_site("site-1")).unwrap();
1261        g.add(make_site("site-2")).unwrap();
1262
1263        let grid = g.read("site", 0).unwrap();
1264        assert_eq!(grid.len(), 2);
1265        assert!(grid.col("site").is_some());
1266        assert!(grid.col("id").is_some());
1267    }
1268
1269    #[test]
1270    fn read_invalid_filter() {
1271        let g = EntityGraph::new();
1272        let err = g.read("!!!", 0).unwrap_err();
1273        assert!(matches!(err, GraphError::Filter(_)));
1274    }
1275
1276    #[test]
1277    fn query_cache_returns_same_results() {
1278        let mut g = EntityGraph::new();
1279        g.add(make_site("site-1")).unwrap();
1280        g.add(make_equip("equip-1", "site-1")).unwrap();
1281        g.add(make_point("pt-1", "equip-1")).unwrap();
1282
1283        // First call populates cache
1284        let results1 = g.read_all("site", 0).unwrap();
1285        assert_eq!(results1.len(), 1);
1286
1287        // Second call should hit cache and return same results
1288        let results2 = g.read_all("site", 0).unwrap();
1289        assert_eq!(results2.len(), 1);
1290        assert_eq!(results1[0].get("id"), results2[0].get("id"));
1291    }
1292
1293    #[test]
1294    fn query_cache_invalidated_by_mutation() {
1295        let mut g = EntityGraph::new();
1296        g.add(make_site("site-1")).unwrap();
1297
1298        let results = g.read_all("site", 0).unwrap();
1299        assert_eq!(results.len(), 1);
1300
1301        // Add another site — cache should be invalidated by version bump
1302        g.add(make_site("site-2")).unwrap();
1303
1304        let results = g.read_all("site", 0).unwrap();
1305        assert_eq!(results.len(), 2);
1306    }
1307
1308    // ── Ref traversal tests ──
1309
1310    #[test]
1311    fn refs_from_returns_targets() {
1312        let mut g = EntityGraph::new();
1313        g.add(make_site("site-1")).unwrap();
1314        g.add(make_equip("equip-1", "site-1")).unwrap();
1315
1316        let targets = g.refs_from("equip-1", None);
1317        assert_eq!(targets, vec!["site-1".to_string()]);
1318    }
1319
1320    #[test]
1321    fn refs_to_returns_sources() {
1322        let mut g = EntityGraph::new();
1323        g.add(make_site("site-1")).unwrap();
1324        g.add(make_equip("equip-1", "site-1")).unwrap();
1325        g.add(make_equip("equip-2", "site-1")).unwrap();
1326
1327        let mut sources = g.refs_to("site-1", None);
1328        sources.sort();
1329        assert_eq!(sources.len(), 2);
1330    }
1331
1332    #[test]
1333    fn type_filtered_ref_queries() {
1334        let mut g = EntityGraph::new();
1335        g.add(make_site("site-1")).unwrap();
1336        g.add(make_equip("equip-1", "site-1")).unwrap();
1337
1338        let targets = g.refs_from("equip-1", Some("siteRef"));
1339        assert_eq!(targets, vec!["site-1".to_string()]);
1340
1341        let targets = g.refs_from("equip-1", Some("equipRef"));
1342        assert!(targets.is_empty());
1343    }
1344
1345    #[test]
1346    fn refs_from_nonexistent_entity() {
1347        let g = EntityGraph::new();
1348        assert!(g.refs_from("nonexistent", None).is_empty());
1349    }
1350
1351    #[test]
1352    fn refs_to_nonexistent_entity() {
1353        let g = EntityGraph::new();
1354        assert!(g.refs_to("nonexistent", None).is_empty());
1355    }
1356
1357    // ── Serialization tests ──
1358
1359    #[test]
1360    fn from_grid_round_trip() {
1361        let mut g = EntityGraph::new();
1362        g.add(make_site("site-1")).unwrap();
1363        g.add(make_equip("equip-1", "site-1")).unwrap();
1364
1365        let grid = g.to_grid("site or equip").unwrap();
1366        assert_eq!(grid.len(), 2);
1367
1368        let g2 = EntityGraph::from_grid(&grid, None).unwrap();
1369        assert_eq!(g2.len(), 2);
1370        assert!(g2.contains("site-1"));
1371        assert!(g2.contains("equip-1"));
1372    }
1373
1374    #[test]
1375    fn to_grid_empty_result() {
1376        let g = EntityGraph::new();
1377        let grid = g.to_grid("site").unwrap();
1378        assert!(grid.is_empty());
1379    }
1380
1381    // ── Update re-indexes correctly ──
1382
1383    #[test]
1384    fn update_reindexes_tags() {
1385        let mut g = EntityGraph::new();
1386        g.add(make_site("site-1")).unwrap();
1387
1388        // Should find the site with "site" filter.
1389        assert_eq!(g.read_all("site", 0).unwrap().len(), 1);
1390
1391        // Remove the "site" marker via update.
1392        let mut changes = HDict::new();
1393        changes.set("site", Kind::Remove);
1394        g.update("site-1", changes).unwrap();
1395
1396        // Should no longer match "site" filter.
1397        assert_eq!(g.read_all("site", 0).unwrap().len(), 0);
1398    }
1399
1400    #[test]
1401    fn update_reindexes_refs() {
1402        let mut g = EntityGraph::new();
1403        g.add(make_site("site-1")).unwrap();
1404        g.add(make_site("site-2")).unwrap();
1405        g.add(make_equip("equip-1", "site-1")).unwrap();
1406
1407        // Initially equip-1 points to site-1.
1408        assert_eq!(g.refs_from("equip-1", None), vec!["site-1".to_string()]);
1409
1410        // Move equip-1 to site-2.
1411        let mut changes = HDict::new();
1412        changes.set("siteRef", Kind::Ref(HRef::from_val("site-2")));
1413        g.update("equip-1", changes).unwrap();
1414
1415        assert_eq!(g.refs_from("equip-1", None), vec!["site-2".to_string()]);
1416        assert!(g.refs_to("site-1", None).is_empty());
1417    }
1418
1419    // ── Dangling ref validation ──
1420
1421    #[test]
1422    fn validate_detects_dangling_refs() {
1423        let mut g = EntityGraph::new();
1424        g.add(make_site("site-1")).unwrap();
1425        // equip-1 has siteRef pointing to "site-1" (exists) — no issue
1426        g.add(make_equip("equip-1", "site-1")).unwrap();
1427        // equip-2 has siteRef pointing to "site-999" (does not exist) — dangling
1428        g.add(make_equip("equip-2", "site-999")).unwrap();
1429
1430        let issues = g.validate();
1431        assert!(!issues.is_empty());
1432
1433        let dangling: Vec<_> = issues
1434            .iter()
1435            .filter(|i| i.issue_type == "dangling_ref")
1436            .collect();
1437        assert_eq!(dangling.len(), 1);
1438        assert_eq!(dangling[0].entity.as_deref(), Some("equip-2"));
1439        assert!(dangling[0].detail.contains("site-999"));
1440        assert!(dangling[0].detail.contains("siteRef"));
1441    }
1442
1443    // ── Empty filter exports all ──
1444
1445    #[test]
1446    fn to_grid_empty_filter_exports_all() {
1447        let mut g = EntityGraph::new();
1448        g.add(make_site("site-1")).unwrap();
1449        g.add(make_equip("equip-1", "site-1")).unwrap();
1450        g.add(make_point("pt-1", "equip-1")).unwrap();
1451
1452        let grid = g.to_grid("").unwrap();
1453        assert_eq!(grid.len(), 3);
1454        assert!(grid.col("id").is_some());
1455    }
1456
1457    // ── from_grid skips rows without id ──
1458
1459    #[test]
1460    fn changelog_bounded_to_max_size() {
1461        let mut graph = EntityGraph::new();
1462        // Add more entities than MAX_CHANGELOG
1463        for i in 0..12_000 {
1464            let mut d = HDict::new();
1465            d.set("id", Kind::Ref(HRef::from_val(format!("e{i}"))));
1466            d.set("dis", Kind::Str(format!("Entity {i}")));
1467            graph.add(d).unwrap();
1468        }
1469        // Changelog should be capped
1470        assert!(graph.changes_since(0).len() <= 10_000);
1471        // Latest changes should still be present
1472        assert!(graph.changes_since(11_999).len() <= 1);
1473    }
1474
1475    #[test]
1476    fn from_grid_skips_rows_without_id() {
1477        let cols = vec![HCol::new("id"), HCol::new("dis"), HCol::new("site")];
1478
1479        let mut row_with_id = HDict::new();
1480        row_with_id.set("id", Kind::Ref(HRef::from_val("site-1")));
1481        row_with_id.set("site", Kind::Marker);
1482        row_with_id.set("dis", Kind::Str("Has ID".into()));
1483
1484        // Row with string id (not a Ref) — should be skipped.
1485        let mut row_bad_id = HDict::new();
1486        row_bad_id.set("id", Kind::Str("not-a-ref".into()));
1487        row_bad_id.set("dis", Kind::Str("Bad ID".into()));
1488
1489        // Row with no id at all — should be skipped.
1490        let mut row_no_id = HDict::new();
1491        row_no_id.set("dis", Kind::Str("No ID".into()));
1492
1493        let grid = HGrid::from_parts(HDict::new(), cols, vec![row_with_id, row_bad_id, row_no_id]);
1494        let g = EntityGraph::from_grid(&grid, None).unwrap();
1495
1496        assert_eq!(g.len(), 1);
1497        assert!(g.contains("site-1"));
1498    }
1499
1500    // ── Graph traversal method tests ──
1501
1502    fn build_hierarchy_graph() -> EntityGraph {
1503        let mut g = EntityGraph::new();
1504        g.add(make_site("s1")).unwrap();
1505        g.add(make_site("s2")).unwrap();
1506        g.add(make_equip("e1", "s1")).unwrap();
1507        g.add(make_equip("e2", "s1")).unwrap();
1508        g.add(make_equip("e3", "s2")).unwrap();
1509        g.add(make_point("p1", "e1")).unwrap();
1510        g.add(make_point("p2", "e1")).unwrap();
1511        g.add(make_point("p3", "e2")).unwrap();
1512        g
1513    }
1514
1515    #[test]
1516    fn all_edges_returns_all_ref_relationships() {
1517        let g = build_hierarchy_graph();
1518        let edges = g.all_edges();
1519        // e1->s1 (siteRef), e2->s1, e3->s2, p1->e1 (equipRef), p2->e1, p3->e2
1520        assert_eq!(edges.len(), 6);
1521        assert!(
1522            edges
1523                .iter()
1524                .any(|(s, t, d)| s == "e1" && t == "siteRef" && d == "s1")
1525        );
1526        assert!(
1527            edges
1528                .iter()
1529                .any(|(s, t, d)| s == "p1" && t == "equipRef" && d == "e1")
1530        );
1531    }
1532
1533    #[test]
1534    fn all_edges_empty_graph() {
1535        let g = EntityGraph::new();
1536        assert!(g.all_edges().is_empty());
1537    }
1538
1539    #[test]
1540    fn neighbors_one_hop() {
1541        let g = build_hierarchy_graph();
1542        let (entities, edges) = g.neighbors("e1", 1, None);
1543        // e1 + s1 (forward via siteRef) + p1, p2 (reverse from equipRef)
1544        let ids: Vec<String> = entities
1545            .iter()
1546            .filter_map(|e| e.id().map(|r| r.val.clone()))
1547            .collect();
1548        assert!(ids.contains(&"e1".to_string()));
1549        assert!(ids.contains(&"s1".to_string()));
1550        assert!(ids.contains(&"p1".to_string()));
1551        assert!(ids.contains(&"p2".to_string()));
1552        assert!(!edges.is_empty());
1553    }
1554
1555    #[test]
1556    fn neighbors_with_ref_type_filter() {
1557        let g = build_hierarchy_graph();
1558        let (entities, edges) = g.neighbors("e1", 1, Some(&["siteRef"]));
1559        // Only forward siteRef edge: e1->s1
1560        let ids: Vec<String> = entities
1561            .iter()
1562            .filter_map(|e| e.id().map(|r| r.val.clone()))
1563            .collect();
1564        assert!(ids.contains(&"e1".to_string()));
1565        assert!(ids.contains(&"s1".to_string()));
1566        // Should not include p1/p2 (those connect via equipRef)
1567        assert!(!ids.contains(&"p1".to_string()));
1568        // Edges should only contain siteRef
1569        assert!(edges.iter().all(|(_, tag, _)| tag == "siteRef"));
1570    }
1571
1572    #[test]
1573    fn neighbors_zero_hops() {
1574        let g = build_hierarchy_graph();
1575        let (entities, edges) = g.neighbors("e1", 0, None);
1576        assert_eq!(entities.len(), 1);
1577        assert!(edges.is_empty());
1578    }
1579
1580    #[test]
1581    fn neighbors_nonexistent_entity() {
1582        let g = build_hierarchy_graph();
1583        let (entities, _) = g.neighbors("nonexistent", 1, None);
1584        assert!(entities.is_empty());
1585    }
1586
1587    #[test]
1588    fn shortest_path_direct() {
1589        let g = build_hierarchy_graph();
1590        let path = g.shortest_path("e1", "s1");
1591        assert_eq!(path, vec!["e1".to_string(), "s1".to_string()]);
1592    }
1593
1594    #[test]
1595    fn shortest_path_two_hops() {
1596        let g = build_hierarchy_graph();
1597        let path = g.shortest_path("p1", "s1");
1598        // p1 -> e1 -> s1
1599        assert_eq!(path.len(), 3);
1600        assert_eq!(path[0], "p1");
1601        assert_eq!(path[2], "s1");
1602    }
1603
1604    #[test]
1605    fn shortest_path_same_node() {
1606        let g = build_hierarchy_graph();
1607        let path = g.shortest_path("s1", "s1");
1608        assert_eq!(path, vec!["s1".to_string()]);
1609    }
1610
1611    #[test]
1612    fn shortest_path_no_connection() {
1613        // s1 and s2 are not connected to each other directly
1614        let g = build_hierarchy_graph();
1615        let path = g.shortest_path("s1", "s2");
1616        // They are disconnected (no edges between them)
1617        assert!(path.is_empty());
1618    }
1619
1620    #[test]
1621    fn shortest_path_nonexistent() {
1622        let g = build_hierarchy_graph();
1623        let path = g.shortest_path("s1", "nonexistent");
1624        assert!(path.is_empty());
1625    }
1626
1627    #[test]
1628    fn subtree_from_site() {
1629        let g = build_hierarchy_graph();
1630        let tree = g.subtree("s1", 10);
1631        // s1 (depth 0), e1 (1), e2 (1), p1 (2), p2 (2), p3 (2)
1632        assert_eq!(tree.len(), 6);
1633        // Root is at depth 0
1634        assert_eq!(tree[0].0.id().unwrap().val, "s1");
1635        assert_eq!(tree[0].1, 0);
1636        // Equips at depth 1
1637        let depth_1: Vec<_> = tree.iter().filter(|(_, d)| *d == 1).collect();
1638        assert_eq!(depth_1.len(), 2);
1639        // Points at depth 2
1640        let depth_2: Vec<_> = tree.iter().filter(|(_, d)| *d == 2).collect();
1641        assert_eq!(depth_2.len(), 3);
1642    }
1643
1644    #[test]
1645    fn subtree_max_depth_1() {
1646        let g = build_hierarchy_graph();
1647        let tree = g.subtree("s1", 1);
1648        // s1 (depth 0), e1 (1), e2 (1) — no points (depth 2)
1649        assert_eq!(tree.len(), 3);
1650    }
1651
1652    #[test]
1653    fn subtree_nonexistent_root() {
1654        let g = build_hierarchy_graph();
1655        let tree = g.subtree("nonexistent", 10);
1656        assert!(tree.is_empty());
1657    }
1658
1659    #[test]
1660    fn subtree_leaf_node() {
1661        let g = build_hierarchy_graph();
1662        let tree = g.subtree("p1", 10);
1663        // p1 has no children referencing it
1664        assert_eq!(tree.len(), 1);
1665        assert_eq!(tree[0].0.id().unwrap().val, "p1");
1666    }
1667}