Skip to main content

fabryk_graph/
builder.rs

1//! GraphBuilder for constructing knowledge graphs.
2//!
3//! The builder orchestrates content discovery and graph construction:
4//!
5//! 1. Discover content files using glob patterns
6//! 2. Parse frontmatter and content
7//! 3. Call GraphExtractor methods to extract nodes/edges
8//! 4. Build the final GraphData structure
9//!
10//! # Taproot Adaptations
11//!
12//! - **Two-phase build**: Phase 1 creates all nodes, Phase 2 creates all edges.
13//!   This ensures all nodes exist before edge creation, handling forward references.
14//! - **Dangling reference tracking**: Edges referencing missing nodes are logged
15//!   in `BuildStats::dangling_refs` instead of silently dropped.
16//! - **Bidirectional edge deduplication**: Prevents duplicate edges when both
17//!   sides of a relationship declare each other.
18
19use crate::persistence::{self, GraphMetadata};
20use crate::{Edge, EdgeOrigin, GraphData, GraphExtractor, Relationship};
21use fabryk_content::markdown::extract_frontmatter;
22use fabryk_core::{Error, Result};
23use serde::{Deserialize, Serialize};
24use std::collections::HashSet;
25use std::path::{Path, PathBuf};
26
27// ============================================================================
28// Builder configuration types
29// ============================================================================
30
31/// Options for handling errors during graph building.
32#[derive(Clone, Debug, Default)]
33pub enum ErrorHandling {
34    /// Stop on first error.
35    #[default]
36    FailFast,
37    /// Continue and collect errors.
38    Collect,
39    /// Log and skip problematic files.
40    Skip,
41}
42
43/// An error that occurred during building.
44#[derive(Debug, Clone)]
45pub struct BuildError {
46    /// Path to the problematic file.
47    pub file: PathBuf,
48    /// Error message.
49    pub message: String,
50}
51
52/// Manual edge definition loaded from JSON.
53#[derive(Clone, Debug, Serialize, Deserialize)]
54pub struct ManualEdge {
55    /// Source node ID.
56    pub from: String,
57    /// Target node ID.
58    pub to: String,
59    /// Relationship type name.
60    pub relationship: String,
61    /// Optional weight override.
62    pub weight: Option<f32>,
63}
64
65/// Statistics from a graph build operation.
66#[derive(Debug, Clone)]
67pub struct BuildStats {
68    /// Number of nodes created.
69    pub nodes_created: usize,
70    /// Number of edges created.
71    pub edges_created: usize,
72    /// Files that were processed.
73    pub files_processed: usize,
74    /// Files that were skipped due to errors.
75    pub files_skipped: usize,
76    /// Errors encountered (if not fail-fast).
77    pub errors: Vec<BuildError>,
78    /// Manual edges loaded.
79    pub manual_edges_loaded: usize,
80    /// Dangling references (edges to/from missing nodes).
81    pub dangling_refs: Vec<String>,
82    /// Duplicate edges that were deduplicated.
83    pub deduped_edges: usize,
84    /// Whether the result was loaded from cache.
85    pub from_cache: bool,
86}
87
88// ============================================================================
89// GraphBuilder
90// ============================================================================
91
92/// Builder for constructing knowledge graphs.
93///
94/// Generic over `E: GraphExtractor` to support any domain.
95///
96/// # Caching
97///
98/// When a cache path is configured via [`with_cache_path`](Self::with_cache_path),
99/// the builder checks if the cached graph is fresh before rebuilding. On cache hit,
100/// the graph is loaded from disk in milliseconds instead of re-parsing all content files.
101pub struct GraphBuilder<E: GraphExtractor> {
102    extractor: E,
103    content_path: Option<PathBuf>,
104    manual_edges_path: Option<PathBuf>,
105    error_handling: ErrorHandling,
106    cache_path: Option<PathBuf>,
107    skip_cache: bool,
108}
109
110impl<E: GraphExtractor> GraphBuilder<E> {
111    /// Creates a new builder with the given extractor.
112    pub fn new(extractor: E) -> Self {
113        Self {
114            extractor,
115            content_path: None,
116            manual_edges_path: None,
117            error_handling: ErrorHandling::default(),
118            cache_path: None,
119            skip_cache: false,
120        }
121    }
122
123    /// Sets the content directory path.
124    pub fn with_content_path(mut self, path: impl Into<PathBuf>) -> Self {
125        self.content_path = Some(path.into());
126        self
127    }
128
129    /// Adds manual edges from a JSON file (Amendment ยง2f-iii).
130    pub fn with_manual_edges(mut self, path: impl Into<PathBuf>) -> Self {
131        self.manual_edges_path = Some(path.into());
132        self
133    }
134
135    /// Sets the error handling strategy.
136    pub fn with_error_handling(mut self, handling: ErrorHandling) -> Self {
137        self.error_handling = handling;
138        self
139    }
140
141    /// Sets the cache file path for graph persistence.
142    ///
143    /// When set, the builder will:
144    /// 1. Check if the cache is fresh before building (by comparing content hashes)
145    /// 2. Load from cache on hit (fast path)
146    /// 3. Save to cache after a successful build (for next time)
147    pub fn with_cache_path(mut self, path: impl Into<PathBuf>) -> Self {
148        self.cache_path = Some(path.into());
149        self
150    }
151
152    /// Forces a rebuild even if the cache is fresh.
153    pub fn skip_cache(mut self) -> Self {
154        self.skip_cache = true;
155        self
156    }
157
158    /// Builds the graph.
159    ///
160    /// Uses a two-phase approach (adapted from Taproot):
161    /// - Phase 1: Extract and add all nodes
162    /// - Phase 2: Extract and add all edges (with dedup and dangling ref tracking)
163    ///
164    /// If a cache path is configured and the cache is fresh, loads from cache instead.
165    pub async fn build(self) -> Result<(GraphData, BuildStats)> {
166        let content_path = self
167            .content_path
168            .as_ref()
169            .ok_or_else(|| Error::config("Content path not set. Use with_content_path() first."))?
170            .clone();
171
172        // Check cache freshness (if cache configured and not skipped)
173        if let Some(ref cache_path) = self.cache_path
174            && !self.skip_cache
175        {
176            let content_hash = compute_content_hash(&content_path)?;
177            if persistence::is_cache_fresh(cache_path, &content_hash) {
178                log::info!(
179                    "Graph cache is fresh, loading from {}",
180                    cache_path.display()
181                );
182                let graph = persistence::load_graph(cache_path)?;
183                let stats = BuildStats {
184                    nodes_created: graph.node_count(),
185                    edges_created: graph.edge_count(),
186                    files_processed: 0,
187                    files_skipped: 0,
188                    errors: Vec::new(),
189                    manual_edges_loaded: 0,
190                    dangling_refs: Vec::new(),
191                    deduped_edges: 0,
192                    from_cache: true,
193                };
194                return Ok((graph, stats));
195            }
196        }
197
198        // Discover files
199        let files = discover_files(&content_path).await?;
200
201        let mut stats = BuildStats {
202            nodes_created: 0,
203            edges_created: 0,
204            files_processed: 0,
205            files_skipped: 0,
206            errors: Vec::new(),
207            manual_edges_loaded: 0,
208            dangling_refs: Vec::new(),
209            deduped_edges: 0,
210            from_cache: false,
211        };
212
213        let mut graph = GraphData::new();
214
215        // Temporary storage for edge data (processed in phase 2)
216        let mut pending_edges: Vec<(String, E::EdgeData)> = Vec::new();
217
218        // ================================================================
219        // Phase 1: Extract and add all nodes
220        // ================================================================
221        for file_path in &files {
222            match self.process_file(&content_path, file_path) {
223                Ok((node_data, edge_data)) => {
224                    let node = self.extractor.to_graph_node(&node_data);
225                    graph.add_node(node.clone());
226                    stats.nodes_created += 1;
227
228                    if let Some(edges) = edge_data {
229                        pending_edges.push((node.id.clone(), edges));
230                    }
231                }
232                Err(e) => {
233                    let build_error = BuildError {
234                        file: file_path.clone(),
235                        message: e.to_string(),
236                    };
237
238                    match self.error_handling {
239                        ErrorHandling::FailFast => return Err(e),
240                        ErrorHandling::Collect | ErrorHandling::Skip => {
241                            stats.files_skipped += 1;
242                            stats.errors.push(build_error);
243                        }
244                    }
245                }
246            }
247
248            stats.files_processed += 1;
249        }
250
251        // ================================================================
252        // Phase 2: Add all edges (with dedup and dangling ref tracking)
253        // ================================================================
254        let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
255
256        for (from_id, edge_data) in &pending_edges {
257            let edges = self.extractor.to_graph_edges(from_id, edge_data);
258            for edge in edges {
259                // Check for dangling references
260                if !graph.contains_node(&edge.from) || !graph.contains_node(&edge.to) {
261                    stats.dangling_refs.push(format!(
262                        "{} -[{}]-> {}",
263                        edge.from,
264                        edge.relationship.name(),
265                        edge.to
266                    ));
267                    continue;
268                }
269
270                // Bidirectional edge deduplication
271                let edge_key = (
272                    edge.from.clone(),
273                    edge.to.clone(),
274                    edge.relationship.name().to_string(),
275                );
276                if !seen_edges.insert(edge_key) {
277                    stats.deduped_edges += 1;
278                    continue;
279                }
280
281                if graph.add_edge(edge).is_ok() {
282                    stats.edges_created += 1;
283                }
284            }
285        }
286
287        // ================================================================
288        // Phase 3: Load manual edges
289        // ================================================================
290        if let Some(ref manual_path) = self.manual_edges_path {
291            stats.manual_edges_loaded =
292                load_manual_edges(manual_path, &mut graph, &mut seen_edges, &mut stats)?;
293        }
294
295        // Save to cache after successful build
296        if let Some(ref cache_path) = self.cache_path {
297            let content_hash = compute_content_hash(&content_path)?;
298            let metadata = GraphMetadata {
299                content_hash: Some(content_hash),
300                source_file_count: Some(stats.files_processed),
301                ..Default::default()
302            };
303            // Ensure parent directory exists
304            if let Some(parent) = cache_path.parent()
305                && !parent.exists()
306            {
307                std::fs::create_dir_all(parent).map_err(|e| Error::io_with_path(e, parent))?;
308            }
309            if let Err(e) = persistence::save_graph(&graph, cache_path, Some(metadata)) {
310                log::warn!("Failed to save graph cache: {e}");
311            }
312        }
313
314        Ok((graph, stats))
315    }
316
317    /// Process a single file to extract node and edge data.
318    fn process_file(
319        &self,
320        base_path: &Path,
321        file_path: &Path,
322    ) -> Result<(E::NodeData, Option<E::EdgeData>)> {
323        let content =
324            std::fs::read_to_string(file_path).map_err(|e| Error::io_with_path(e, file_path))?;
325
326        let fm_result = extract_frontmatter(&content)?;
327
328        let frontmatter = fm_result
329            .value()
330            .cloned()
331            .unwrap_or(yaml_serde::Value::Null);
332        let body = fm_result.body();
333
334        let node_data = self
335            .extractor
336            .extract_node(base_path, file_path, &frontmatter, body)?;
337
338        let edge_data = self.extractor.extract_edges(&frontmatter, body)?;
339
340        Ok((node_data, edge_data))
341    }
342}
343
344// ============================================================================
345// Helper functions
346// ============================================================================
347
348/// Parse a relationship string to Relationship enum.
349fn parse_relationship(s: &str) -> Relationship {
350    match s.to_lowercase().as_str() {
351        "prerequisite" | "prereq" => Relationship::Prerequisite,
352        "leads_to" | "leadsto" => Relationship::LeadsTo,
353        "relates_to" | "relatesto" | "related" => Relationship::RelatesTo,
354        "extends" => Relationship::Extends,
355        "introduces" => Relationship::Introduces,
356        "covers" => Relationship::Covers,
357        "variant_of" | "variantof" => Relationship::VariantOf,
358        "contrasts_with" | "contrastswith" => Relationship::ContrastsWith,
359        "answers_question" | "answersquestion" | "answers_questions" => {
360            Relationship::AnswersQuestion
361        }
362        other => Relationship::Custom(other.to_string()),
363    }
364}
365
366/// Load manual edges from a JSON file.
367fn load_manual_edges(
368    path: &Path,
369    graph: &mut GraphData,
370    seen_edges: &mut HashSet<(String, String, String)>,
371    stats: &mut BuildStats,
372) -> Result<usize> {
373    if !path.exists() {
374        return Ok(0);
375    }
376
377    let json = std::fs::read_to_string(path).map_err(|e| Error::io_with_path(e, path))?;
378
379    let manual_edges: Vec<ManualEdge> = serde_json::from_str(&json)
380        .map_err(|e| Error::parse(format!("Failed to parse manual edges: {e}")))?;
381
382    let mut loaded = 0;
383    for manual in manual_edges {
384        if !graph.contains_node(&manual.from) || !graph.contains_node(&manual.to) {
385            stats.dangling_refs.push(format!(
386                "manual: {} -[{}]-> {}",
387                manual.from, manual.relationship, manual.to
388            ));
389            continue;
390        }
391
392        let edge_key = (
393            manual.from.clone(),
394            manual.to.clone(),
395            manual.relationship.clone(),
396        );
397        if !seen_edges.insert(edge_key) {
398            stats.deduped_edges += 1;
399            continue;
400        }
401
402        let relationship = parse_relationship(&manual.relationship);
403        let weight = manual
404            .weight
405            .unwrap_or_else(|| relationship.default_weight());
406
407        let edge = Edge {
408            from: manual.from,
409            to: manual.to,
410            relationship,
411            weight,
412            origin: EdgeOrigin::Manual,
413        };
414
415        if graph.add_edge(edge).is_ok() {
416            loaded += 1;
417        }
418    }
419
420    Ok(loaded)
421}
422
423/// Compute a content hash for cache freshness checking.
424///
425/// Uses file paths and modification times (not content) for speed.
426/// Deterministic: sorted paths ensure consistent hashing.
427fn compute_content_hash(dir: &Path) -> Result<String> {
428    use std::collections::hash_map::DefaultHasher;
429    use std::hash::{Hash, Hasher};
430
431    let mut hasher = DefaultHasher::new();
432    let mut file_info: Vec<(String, u64)> = Vec::new();
433
434    fn collect_files(dir: &Path, base: &Path, file_info: &mut Vec<(String, u64)>) -> Result<()> {
435        for entry in std::fs::read_dir(dir).map_err(|e| Error::io_with_path(e, dir))? {
436            let entry = entry.map_err(Error::io)?;
437            let path = entry.path();
438            if path.is_dir() {
439                collect_files(&path, base, file_info)?;
440            } else if path.extension().is_some_and(|e| e == "md") {
441                let relative = path
442                    .strip_prefix(base)
443                    .unwrap_or(&path)
444                    .to_string_lossy()
445                    .to_string();
446                let mtime = std::fs::metadata(&path)
447                    .ok()
448                    .and_then(|m| m.modified().ok())
449                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
450                    .map(|d| d.as_secs())
451                    .unwrap_or(0);
452                file_info.push((relative, mtime));
453            }
454        }
455        Ok(())
456    }
457
458    collect_files(dir, dir, &mut file_info)?;
459    file_info.sort_by(|a, b| a.0.cmp(&b.0));
460
461    for (path, mtime) in &file_info {
462        path.hash(&mut hasher);
463        mtime.hash(&mut hasher);
464    }
465
466    Ok(format!("{:016x}", hasher.finish()))
467}
468
469/// Discover markdown content files in a directory.
470async fn discover_files(base_path: &Path) -> Result<Vec<PathBuf>> {
471    use fabryk_core::util::files::{FindOptions, find_all_files};
472
473    let files = find_all_files(base_path, FindOptions::markdown()).await?;
474    let paths: Vec<PathBuf> = files.into_iter().map(|f| f.path).collect();
475
476    Ok(paths)
477}
478
479// ============================================================================
480// Tests
481// ============================================================================
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486    use crate::Relationship;
487    use crate::extractor::mock::MockExtractor;
488    use tempfile::tempdir;
489
490    async fn setup_test_files() -> (tempfile::TempDir, PathBuf) {
491        let dir = tempdir().unwrap();
492        let content_dir = dir.path().join("content");
493        std::fs::create_dir(&content_dir).unwrap();
494
495        let file_a = "---\ntitle: \"Concept A\"\ncategory: \"basics\"\nprerequisites:\n  - concept-b\n---\n\n# Concept A\n\nContent here.\n";
496        let file_b = "---\ntitle: \"Concept B\"\ncategory: \"fundamentals\"\n---\n\n# Concept B\n\nFoundation content.\n";
497
498        std::fs::write(content_dir.join("concept-a.md"), file_a).unwrap();
499        std::fs::write(content_dir.join("concept-b.md"), file_b).unwrap();
500
501        (dir, content_dir)
502    }
503
504    #[tokio::test]
505    async fn test_builder_basic() {
506        let (_dir, content_dir) = setup_test_files().await;
507
508        let (graph, stats) = GraphBuilder::new(MockExtractor)
509            .with_content_path(&content_dir)
510            .build()
511            .await
512            .unwrap();
513
514        assert_eq!(stats.files_processed, 2);
515        assert_eq!(graph.node_count(), 2);
516        assert!(graph.contains_node("concept-a"));
517        assert!(graph.contains_node("concept-b"));
518    }
519
520    #[tokio::test]
521    async fn test_builder_extracts_edges() {
522        let (_dir, content_dir) = setup_test_files().await;
523
524        let (graph, stats) = GraphBuilder::new(MockExtractor)
525            .with_content_path(&content_dir)
526            .build()
527            .await
528            .unwrap();
529
530        // concept-a has prerequisite concept-b
531        assert!(graph.edge_count() >= 1);
532        assert!(stats.edges_created >= 1);
533    }
534
535    #[tokio::test]
536    async fn test_builder_manual_edges() {
537        let (_dir, content_dir) = setup_test_files().await;
538        let manual_edges_path = content_dir.parent().unwrap().join("manual_edges.json");
539
540        let manual_edges = r#"[
541            {"from": "concept-a", "to": "concept-b", "relationship": "relates_to", "weight": 0.9}
542        ]"#;
543        std::fs::write(&manual_edges_path, manual_edges).unwrap();
544
545        let (_graph, stats) = GraphBuilder::new(MockExtractor)
546            .with_content_path(&content_dir)
547            .with_manual_edges(&manual_edges_path)
548            .build()
549            .await
550            .unwrap();
551
552        assert_eq!(stats.manual_edges_loaded, 1);
553    }
554
555    #[tokio::test]
556    async fn test_builder_error_handling_collect() {
557        let dir = tempdir().unwrap();
558        let content_dir = dir.path().join("content");
559        std::fs::create_dir(&content_dir).unwrap();
560
561        std::fs::write(
562            content_dir.join("valid.md"),
563            "---\ntitle: Valid\n---\nContent",
564        )
565        .unwrap();
566        std::fs::write(content_dir.join("invalid.md"), "not yaml frontmatter").unwrap();
567
568        let (_graph, stats) = GraphBuilder::new(MockExtractor)
569            .with_content_path(&content_dir)
570            .with_error_handling(ErrorHandling::Collect)
571            .build()
572            .await
573            .unwrap();
574
575        assert_eq!(stats.files_processed, 2);
576        // invalid.md has no frontmatter delimiters, so extract_frontmatter returns
577        // Ok with no frontmatter. MockExtractor will still produce a node from file stem.
578        // So it may succeed or fail depending on exact behavior.
579        assert!(stats.files_processed >= 1);
580    }
581
582    #[tokio::test]
583    async fn test_builder_missing_content_path() {
584        let result = GraphBuilder::new(MockExtractor).build().await;
585        assert!(result.is_err());
586    }
587
588    #[tokio::test]
589    async fn test_builder_dangling_refs() {
590        let dir = tempdir().unwrap();
591        let content_dir = dir.path().join("content");
592        std::fs::create_dir(&content_dir).unwrap();
593
594        // File references a non-existent prerequisite
595        let file = "---\ntitle: \"Orphan\"\nprerequisites:\n  - nonexistent\n---\n\n# Orphan\n";
596        std::fs::write(content_dir.join("orphan.md"), file).unwrap();
597
598        let (_graph, stats) = GraphBuilder::new(MockExtractor)
599            .with_content_path(&content_dir)
600            .build()
601            .await
602            .unwrap();
603
604        assert_eq!(stats.nodes_created, 1);
605        assert!(!stats.dangling_refs.is_empty());
606        assert!(stats.dangling_refs[0].contains("nonexistent"));
607    }
608
609    #[tokio::test]
610    async fn test_builder_edge_dedup() {
611        let dir = tempdir().unwrap();
612        let content_dir = dir.path().join("content");
613        std::fs::create_dir(&content_dir).unwrap();
614
615        // Both files reference each other as related
616        let file_a = "---\ntitle: \"A\"\nrelated:\n  - b\n---\n\n# A\n";
617        let file_b = "---\ntitle: \"B\"\nrelated:\n  - a\n---\n\n# B\n";
618
619        std::fs::write(content_dir.join("a.md"), file_a).unwrap();
620        std::fs::write(content_dir.join("b.md"), file_b).unwrap();
621
622        let (graph, stats) = GraphBuilder::new(MockExtractor)
623            .with_content_path(&content_dir)
624            .build()
625            .await
626            .unwrap();
627
628        // Should have 2 nodes
629        assert_eq!(graph.node_count(), 2);
630        assert_eq!(stats.nodes_created, 2);
631        // Both directions exist (a->b and b->a are different keys)
632        assert_eq!(graph.edge_count(), 2);
633        assert_eq!(stats.edges_created, 2);
634    }
635
636    #[tokio::test]
637    async fn test_builder_empty_directory() {
638        let dir = tempdir().unwrap();
639        let content_dir = dir.path().join("empty");
640        std::fs::create_dir(&content_dir).unwrap();
641
642        let (graph, stats) = GraphBuilder::new(MockExtractor)
643            .with_content_path(&content_dir)
644            .build()
645            .await
646            .unwrap();
647
648        assert_eq!(graph.node_count(), 0);
649        assert_eq!(stats.files_processed, 0);
650    }
651
652    #[test]
653    fn test_parse_relationship() {
654        assert_eq!(
655            parse_relationship("prerequisite"),
656            Relationship::Prerequisite
657        );
658        assert_eq!(parse_relationship("prereq"), Relationship::Prerequisite);
659        assert_eq!(parse_relationship("leads_to"), Relationship::LeadsTo);
660        assert_eq!(parse_relationship("relates_to"), Relationship::RelatesTo);
661        assert_eq!(parse_relationship("related"), Relationship::RelatesTo);
662        assert_eq!(parse_relationship("extends"), Relationship::Extends);
663        assert_eq!(parse_relationship("introduces"), Relationship::Introduces);
664        assert_eq!(parse_relationship("covers"), Relationship::Covers);
665        assert_eq!(parse_relationship("variant_of"), Relationship::VariantOf);
666        assert_eq!(
667            parse_relationship("custom_rel"),
668            Relationship::Custom("custom_rel".to_string())
669        );
670    }
671
672    #[tokio::test]
673    async fn test_builder_manual_edges_missing_file() {
674        let (_dir, content_dir) = setup_test_files().await;
675        let missing_path = content_dir.parent().unwrap().join("nonexistent.json");
676
677        let (_graph, stats) = GraphBuilder::new(MockExtractor)
678            .with_content_path(&content_dir)
679            .with_manual_edges(&missing_path)
680            .build()
681            .await
682            .unwrap();
683
684        // Missing manual edges file should be silently skipped
685        assert_eq!(stats.manual_edges_loaded, 0);
686    }
687
688    #[tokio::test]
689    async fn test_builder_manual_edges_dangling() {
690        let (_dir, content_dir) = setup_test_files().await;
691        let manual_path = content_dir.parent().unwrap().join("manual.json");
692
693        let manual = r#"[
694            {"from": "concept-a", "to": "nonexistent", "relationship": "relates_to"}
695        ]"#;
696        std::fs::write(&manual_path, manual).unwrap();
697
698        let (_graph, stats) = GraphBuilder::new(MockExtractor)
699            .with_content_path(&content_dir)
700            .with_manual_edges(&manual_path)
701            .build()
702            .await
703            .unwrap();
704
705        assert_eq!(stats.manual_edges_loaded, 0);
706        assert!(
707            stats
708                .dangling_refs
709                .iter()
710                .any(|r| r.contains("nonexistent"))
711        );
712    }
713
714    // ================================================================
715    // Cache tests
716    // ================================================================
717
718    #[tokio::test]
719    async fn test_builder_cache_hit() {
720        let (_dir, content_dir) = setup_test_files().await;
721        let cache_path = content_dir.parent().unwrap().join("graph-cache.json");
722
723        // First build: cold (no cache)
724        let (graph1, stats1) = GraphBuilder::new(MockExtractor)
725            .with_content_path(&content_dir)
726            .with_cache_path(&cache_path)
727            .build()
728            .await
729            .unwrap();
730        assert!(!stats1.from_cache);
731        assert!(cache_path.exists());
732
733        // Second build: warm (cache hit)
734        let (graph2, stats2) = GraphBuilder::new(MockExtractor)
735            .with_content_path(&content_dir)
736            .with_cache_path(&cache_path)
737            .build()
738            .await
739            .unwrap();
740        assert!(stats2.from_cache);
741        assert_eq!(graph1.node_count(), graph2.node_count());
742        assert_eq!(graph1.edge_count(), graph2.edge_count());
743    }
744
745    #[tokio::test]
746    async fn test_builder_cache_miss_on_content_change() {
747        let (_dir, content_dir) = setup_test_files().await;
748        let cache_path = content_dir.parent().unwrap().join("graph-cache.json");
749
750        // First build
751        let (_graph, stats1) = GraphBuilder::new(MockExtractor)
752            .with_content_path(&content_dir)
753            .with_cache_path(&cache_path)
754            .build()
755            .await
756            .unwrap();
757        assert!(!stats1.from_cache);
758
759        // Add a new file (changes content hash)
760        let file_c = "---\ntitle: \"Concept C\"\ncategory: \"new\"\n---\n\n# Concept C\n";
761        std::fs::write(content_dir.join("concept-c.md"), file_c).unwrap();
762
763        // Second build: cache miss (content changed)
764        let (graph, stats2) = GraphBuilder::new(MockExtractor)
765            .with_content_path(&content_dir)
766            .with_cache_path(&cache_path)
767            .build()
768            .await
769            .unwrap();
770        assert!(!stats2.from_cache);
771        assert_eq!(graph.node_count(), 3);
772    }
773
774    #[tokio::test]
775    async fn test_builder_skip_cache() {
776        let (_dir, content_dir) = setup_test_files().await;
777        let cache_path = content_dir.parent().unwrap().join("graph-cache.json");
778
779        // First build: populates cache
780        GraphBuilder::new(MockExtractor)
781            .with_content_path(&content_dir)
782            .with_cache_path(&cache_path)
783            .build()
784            .await
785            .unwrap();
786
787        // Second build with skip_cache: forces rebuild
788        let (_graph, stats) = GraphBuilder::new(MockExtractor)
789            .with_content_path(&content_dir)
790            .with_cache_path(&cache_path)
791            .skip_cache()
792            .build()
793            .await
794            .unwrap();
795        assert!(!stats.from_cache);
796        assert_eq!(stats.files_processed, 2);
797    }
798
799    #[tokio::test]
800    async fn test_builder_no_cache_path() {
801        let (_dir, content_dir) = setup_test_files().await;
802
803        // Build without cache: same behavior as before
804        let (_graph, stats) = GraphBuilder::new(MockExtractor)
805            .with_content_path(&content_dir)
806            .build()
807            .await
808            .unwrap();
809        assert!(!stats.from_cache);
810        assert_eq!(stats.files_processed, 2);
811    }
812
813    #[test]
814    fn test_compute_content_hash_deterministic() {
815        let dir = tempdir().unwrap();
816        let content_dir = dir.path().join("content");
817        std::fs::create_dir(&content_dir).unwrap();
818        std::fs::write(content_dir.join("a.md"), "content a").unwrap();
819        std::fs::write(content_dir.join("b.md"), "content b").unwrap();
820
821        let hash1 = compute_content_hash(&content_dir).unwrap();
822        let hash2 = compute_content_hash(&content_dir).unwrap();
823        assert_eq!(hash1, hash2);
824    }
825
826    #[test]
827    fn test_compute_content_hash_changes() {
828        let dir = tempdir().unwrap();
829        let content_dir = dir.path().join("content");
830        std::fs::create_dir(&content_dir).unwrap();
831        std::fs::write(content_dir.join("a.md"), "content a").unwrap();
832
833        let hash1 = compute_content_hash(&content_dir).unwrap();
834
835        std::fs::write(content_dir.join("b.md"), "content b").unwrap();
836
837        let hash2 = compute_content_hash(&content_dir).unwrap();
838        assert_ne!(hash1, hash2);
839    }
840}