Skip to main content

panproto_project/
lib.rs

1//! # panproto-project
2//!
3//! Multi-file project assembly via schema coproduct for panproto.
4//!
5//! Orchestrates parsing all files in a project directory into a unified
6//! project-level schema. The project schema is the coproduct (disjoint union)
7//! of per-file schemas, with cross-file edges for imports and type references.
8//!
9//! ## Two-pass approach
10//!
11//! 1. **Parse pass**: For each file, detect language, parse via
12//!    `ParserRegistry`, prefix vertex IDs
13//!    with the file path.
14//! 2. **Resolve pass**: Walk `import` vertices, match against exports
15//!    in other file schemas, emit `imports` edges connecting them.
16//!
17//! ## Coproduct construction
18//!
19//! The schema-level coproduct prefixes each file's vertex names with the file
20//! path. Edges within a file retain their local structure. The result is a
21//! single [`Schema`] spanning the entire project.
22//!
23//! The coproduct is universal: any morphism out of the project schema restricts
24//! to per-file morphisms. This means per-file diffs compose into project-level
25//! diffs automatically.
26
27use std::collections::HashMap;
28use std::path::{Path, PathBuf};
29
30use globset::GlobSet;
31use panproto_parse::ParserRegistry;
32use panproto_protocols::raw_file;
33use panproto_schema::Schema;
34use rustc_hash::FxHashMap;
35
36/// Incremental parsing cache for project assembly.
37pub mod cache;
38
39/// Project manifest (`panproto.toml`) loading and generation.
40pub mod config;
41
42/// Language detection by file extension and package detection.
43pub mod detect;
44
45/// Error types for project assembly.
46pub mod error;
47
48/// Cross-file import resolution.
49pub mod resolve;
50
51pub use config::ProjectConfig;
52pub use detect::DetectedPackage;
53pub use error::ProjectError;
54
55/// A parsed project containing a unified schema and per-file metadata.
56#[derive(Debug, Clone)]
57pub struct ProjectSchema {
58    /// The unified coproduct schema spanning all files.
59    pub schema: Schema,
60    /// Mapping from file path to the root vertex IDs belonging to that file.
61    pub file_map: HashMap<PathBuf, Vec<panproto_gat::Name>>,
62    /// Mapping from file path to the protocol used to parse it.
63    pub protocol_map: HashMap<PathBuf, String>,
64}
65
66/// Builder for assembling a multi-file project into a unified schema.
67///
68/// Files are added one at a time (or by scanning a directory), then assembled
69/// into a [`ProjectSchema`] via coproduct construction.
70pub struct ProjectBuilder {
71    /// The parser registry for all supported languages.
72    registry: ParserRegistry,
73    /// Per-file parsed schemas, keyed by file path.
74    file_schemas: FxHashMap<PathBuf, Schema>,
75    /// Per-file protocol names.
76    protocol_map: FxHashMap<PathBuf, String>,
77    /// Compiled exclude patterns from config (if any).
78    excludes: Option<GlobSet>,
79    /// Per-path protocol overrides from package config.
80    protocol_overrides: FxHashMap<PathBuf, String>,
81    /// Optional incremental parsing cache for skipping unchanged files.
82    cache: Option<cache::FileCache>,
83}
84
85impl ProjectBuilder {
86    /// Create a new project builder with the default parser registry.
87    #[must_use]
88    pub fn new() -> Self {
89        Self {
90            registry: ParserRegistry::new(),
91            file_schemas: FxHashMap::default(),
92            protocol_map: FxHashMap::default(),
93            excludes: None,
94            protocol_overrides: FxHashMap::default(),
95            cache: None,
96        }
97    }
98
99    /// Create a new project builder with a custom parser registry.
100    #[must_use]
101    pub fn with_registry(registry: ParserRegistry) -> Self {
102        Self {
103            registry,
104            file_schemas: FxHashMap::default(),
105            protocol_map: FxHashMap::default(),
106            excludes: None,
107            protocol_overrides: FxHashMap::default(),
108            cache: None,
109        }
110    }
111
112    /// Create a project builder configured from a [`ProjectConfig`].
113    ///
114    /// Compiles exclude patterns and builds the per-package protocol override map.
115    ///
116    /// # Errors
117    ///
118    /// Returns `ProjectError::InvalidPattern` if a glob pattern is malformed.
119    pub fn with_config(cfg: &ProjectConfig, base_dir: &Path) -> Result<Self, ProjectError> {
120        let excludes = config::compile_excludes(base_dir, &cfg.workspace.exclude)?;
121        let mut protocol_overrides = FxHashMap::default();
122        for pkg in &cfg.package {
123            if let Some(ref proto) = pkg.protocol {
124                protocol_overrides.insert(base_dir.join(&pkg.path), proto.clone());
125            }
126        }
127        Ok(Self {
128            registry: ParserRegistry::new(),
129            file_schemas: FxHashMap::default(),
130            protocol_map: FxHashMap::default(),
131            excludes: Some(excludes),
132            protocol_overrides,
133            cache: None,
134        })
135    }
136
137    /// Create a project builder configured from a [`ProjectConfig`] with an
138    /// incremental parsing cache.
139    ///
140    /// Behaves like [`with_config`](Self::with_config) but attaches a
141    /// [`FileCache`](cache::FileCache) so that unchanged files are not
142    /// re-parsed.
143    ///
144    /// # Errors
145    ///
146    /// Returns `ProjectError::InvalidPattern` if a glob pattern is malformed.
147    pub fn with_config_and_cache(
148        cfg: &ProjectConfig,
149        base_dir: &Path,
150        file_cache: cache::FileCache,
151    ) -> Result<Self, ProjectError> {
152        let mut builder = Self::with_config(cfg, base_dir)?;
153        builder.cache = Some(file_cache);
154        Ok(builder)
155    }
156
157    /// Extract the cache from the builder (e.g., for saving after build).
158    ///
159    /// Returns `None` if no cache was attached.
160    pub const fn take_cache(&mut self) -> Option<cache::FileCache> {
161        self.cache.take()
162    }
163
164    /// Add a single file to the project.
165    ///
166    /// The file's language is detected from its path. If the language is
167    /// recognized, the file is parsed via tree-sitter. Otherwise, it is
168    /// parsed as a raw file (text or binary).
169    ///
170    /// If a cache is attached and the file's mtime and size match the
171    /// cached entry, the cached schema is used without re-parsing.
172    ///
173    /// # Errors
174    ///
175    /// Returns [`ProjectError::ParseFailed`] if parsing fails.
176    pub fn add_file(&mut self, path: &Path, content: &[u8]) -> Result<(), ProjectError> {
177        // Check cache first.
178        if let Some(ref mut file_cache) = self.cache {
179            if let Some(entry) = file_cache.entries.get(path) {
180                if cache::is_valid(entry, path) {
181                    self.file_schemas
182                        .insert(path.to_owned(), entry.schema.clone());
183                    self.protocol_map
184                        .insert(path.to_owned(), entry.protocol.clone());
185                    return Ok(());
186                }
187            }
188        }
189
190        let path_str = path.display().to_string();
191
192        // Check per-package protocol override first.
193        let override_protocol = self
194            .protocol_overrides
195            .iter()
196            .find(|(pkg_path, _)| path.starts_with(pkg_path))
197            .map(|(_, proto)| proto.clone());
198
199        // Detect language and parse.
200        let (schema, protocol_name) = if let Some(proto) = override_protocol {
201            if let Ok(schema) = self
202                .registry
203                .parse_with_protocol(&proto, content, &path_str)
204            {
205                (schema, proto)
206            } else {
207                // Fall back to raw file if overridden protocol fails.
208                let text = std::str::from_utf8(content).map_err(|e| ProjectError::ParseFailed {
209                    path: path_str.clone(),
210                    reason: format!("UTF-8 decode: {e}"),
211                })?;
212                let schema = raw_file::parse_text(text, &path_str).map_err(|e| {
213                    ProjectError::ParseFailed {
214                        path: path_str.clone(),
215                        reason: e.to_string(),
216                    }
217                })?;
218                (schema, "raw_file".to_owned())
219            }
220        } else if let Some(protocol) = detect::detect_language(path, &self.registry) {
221            if let Ok(schema) = self
222                .registry
223                .parse_with_protocol(protocol, content, &path_str)
224            {
225                (schema, protocol.to_owned())
226            } else {
227                // Fall back to raw file parsing if the language parser fails
228                // (e.g., Kotlin's tree-sitter grammar is ABI-incompatible).
229                let text = std::str::from_utf8(content).map_err(|e| ProjectError::ParseFailed {
230                    path: path_str.clone(),
231                    reason: format!("UTF-8 decode: {e}"),
232                })?;
233                let schema = raw_file::parse_text(text, &path_str).map_err(|e| {
234                    ProjectError::ParseFailed {
235                        path: path_str.clone(),
236                        reason: e.to_string(),
237                    }
238                })?;
239                (schema, "raw_file".to_owned())
240            }
241        } else if detect::is_binary_extension(path) {
242            let schema = raw_file::parse_binary(&path_str, content).map_err(|e| {
243                ProjectError::ParseFailed {
244                    path: path_str.clone(),
245                    reason: e.to_string(),
246                }
247            })?;
248            (schema, "raw_file".to_owned())
249        } else {
250            // Parse as text raw file.
251            let text = std::str::from_utf8(content).map_err(|e| ProjectError::ParseFailed {
252                path: path_str.clone(),
253                reason: format!("UTF-8 decode: {e}"),
254            })?;
255            let schema =
256                raw_file::parse_text(text, &path_str).map_err(|e| ProjectError::ParseFailed {
257                    path: path_str.clone(),
258                    reason: e.to_string(),
259                })?;
260            (schema, "raw_file".to_owned())
261        };
262
263        // Update cache entry for this file.
264        if let Some(ref mut file_cache) = self.cache {
265            let metadata = std::fs::metadata(path).ok();
266            let mtime_secs = metadata
267                .as_ref()
268                .and_then(|m| m.modified().ok())
269                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
270                .map_or(0, |d| d.as_secs());
271            let size = metadata.map_or(0, |m| m.len());
272            let content_hash = blake3::hash(content).to_string();
273            file_cache.entries.insert(
274                path.to_owned(),
275                cache::CacheEntry {
276                    mtime_secs,
277                    size,
278                    content_hash,
279                    schema: schema.clone(),
280                    protocol: protocol_name.clone(),
281                },
282            );
283        }
284
285        self.file_schemas.insert(path.to_owned(), schema);
286        self.protocol_map.insert(path.to_owned(), protocol_name);
287        Ok(())
288    }
289
290    /// Add all files in a directory (recursively).
291    ///
292    /// Skips hidden directories (starting with `.`) and common build/output
293    /// directories (`target`, `node_modules`, `__pycache__`, `.git`, etc.).
294    ///
295    /// # Errors
296    ///
297    /// Returns [`ProjectError`] if any file fails to read or parse.
298    pub fn add_directory(&mut self, dir: &Path) -> Result<(), ProjectError> {
299        self.walk_directory(dir)
300    }
301
302    /// Recursively walk a directory, adding all files.
303    fn walk_directory(&mut self, dir: &Path) -> Result<(), ProjectError> {
304        let entries = std::fs::read_dir(dir)?;
305
306        for entry in entries {
307            let entry = entry?;
308            let path = entry.path();
309            let file_name = entry.file_name();
310            let name_str = file_name.to_string_lossy();
311
312            // Always skip hidden files/directories.
313            if name_str.starts_with('.') {
314                continue;
315            }
316
317            // Check against compiled excludes (config-driven) or hardcoded defaults.
318            if let Some(ref excludes) = self.excludes {
319                if excludes.is_match(&path) {
320                    continue;
321                }
322            } else if matches!(
323                name_str.as_ref(),
324                "target" | "node_modules" | "__pycache__" | "build" | "dist" | "vendor" | "Pods"
325            ) {
326                continue;
327            }
328
329            if path.is_dir() {
330                self.walk_directory(&path)?;
331            } else if path.is_file() {
332                let content = std::fs::read(&path)?;
333                self.add_file(&path, &content)?;
334            }
335        }
336
337        Ok(())
338    }
339
340    /// Get the number of files added to the builder.
341    #[must_use]
342    pub fn file_count(&self) -> usize {
343        self.file_schemas.len()
344    }
345
346    /// Access the per-file parsed schemas without consuming the builder.
347    #[must_use]
348    pub const fn file_schemas(&self) -> &FxHashMap<PathBuf, Schema> {
349        &self.file_schemas
350    }
351
352    /// Access the per-file protocol names without consuming the builder.
353    #[must_use]
354    pub const fn protocol_map_ref(&self) -> &FxHashMap<PathBuf, String> {
355        &self.protocol_map
356    }
357
358    /// Build the project as a Merkle tree of per-file schemas rather
359    /// than a flat coproduct schema.
360    ///
361    /// The tree is stored incrementally in `store`; see
362    /// [`build_project_tree`] for the standalone function form.
363    ///
364    /// # Errors
365    ///
366    /// Returns [`ProjectError::CoproductFailed`] if the underlying
367    /// store rejects an object write.
368    pub fn build_tree<S>(self, store: &mut S) -> Result<ProjectSchemaTree, ProjectError>
369    where
370        S: panproto_vcs::Store,
371    {
372        // Mirror the flat build path: resolve cross-file imports so
373        // the per-file schemas persisted under the tree carry the
374        // same edges the flat coproduct would. Without this step,
375        // tree-built projects silently drop every cross-file import
376        // edge that the flat build path records.
377        let cross_file_edges = resolve_per_file_imports(&self.file_schemas, &self.protocol_map)?;
378        let root_id = build_project_tree(
379            store,
380            &self.file_schemas,
381            &self.protocol_map,
382            &cross_file_edges,
383        )?;
384        let protocol_map: HashMap<PathBuf, String> = self.protocol_map.into_iter().collect();
385        Ok(ProjectSchemaTree {
386            root_id,
387            protocol_map,
388        })
389    }
390
391    /// Build the project schema by constructing the coproduct of all file schemas.
392    ///
393    /// Each file's vertices are prefixed with the file path to ensure uniqueness
394    /// in the coproduct. Edges within a file retain their local structure.
395    ///
396    /// # Errors
397    ///
398    /// Returns [`ProjectError::CoproductFailed`] if construction fails.
399    pub fn build(self) -> Result<ProjectSchema, ProjectError> {
400        if self.file_schemas.is_empty() {
401            return Err(ProjectError::CoproductFailed {
402                reason: "no files added to project".to_owned(),
403            });
404        }
405
406        // For single-file projects, return the schema as-is.
407        if self.file_schemas.len() == 1 {
408            let (path, schema) = self.file_schemas.into_iter().next().ok_or_else(|| {
409                ProjectError::CoproductFailed {
410                    reason: "internal error: empty after length check".to_owned(),
411                }
412            })?;
413
414            let root_vertices: Vec<panproto_gat::Name> = schema.vertices.keys().cloned().collect();
415            let mut file_map = HashMap::new();
416            file_map.insert(path, root_vertices);
417
418            let protocol_map: HashMap<PathBuf, String> = self.protocol_map.into_iter().collect();
419
420            return Ok(ProjectSchema {
421                schema,
422                file_map,
423                protocol_map,
424            });
425        }
426
427        // Multi-file coproduct: build a new schema containing all vertices/edges
428        // from all file schemas, with path-prefixed names.
429        //
430        // We use the "raw_file" protocol for the coproduct since it's the most
431        // permissive (empty obj_kinds = open protocol). The coproduct schema
432        // contains vertices from multiple protocols.
433        let coproduct_protocol = panproto_schema::Protocol {
434            name: "project".into(),
435            schema_theory: "ThProjectSchema".into(),
436            instance_theory: "ThProjectInstance".into(),
437            schema_composition: None,
438            instance_composition: None,
439            edge_rules: vec![],
440            obj_kinds: vec![], // Open protocol.
441            constraint_sorts: vec![],
442            has_order: true,
443            has_coproducts: false,
444            has_recursion: false,
445            has_causal: false,
446            nominal_identity: false,
447            has_defaults: false,
448            has_coercions: false,
449            has_mergers: false,
450            has_policies: false,
451        };
452
453        let mut builder = panproto_schema::SchemaBuilder::new(&coproduct_protocol);
454        let mut file_map: HashMap<PathBuf, Vec<panproto_gat::Name>> = HashMap::new();
455
456        for (path, schema) in &self.file_schemas {
457            let prefix = path.display().to_string();
458            let mut file_vertices = Vec::new();
459
460            // Copy vertices with path prefix.
461            for (name, vertex) in &schema.vertices {
462                let prefixed_name = format!("{prefix}::{name}");
463                builder = builder
464                    .vertex(&prefixed_name, vertex.kind.as_ref(), None)
465                    .map_err(|e| ProjectError::CoproductFailed {
466                        reason: format!("vertex {prefixed_name}: {e}"),
467                    })?;
468                file_vertices.push(panproto_gat::Name::from(prefixed_name.as_str()));
469
470                // Copy constraints.
471                if let Some(constraints) = schema.constraints.get(name) {
472                    for c in constraints {
473                        builder = builder.constraint(&prefixed_name, c.sort.as_ref(), &c.value);
474                    }
475                }
476            }
477
478            // Copy edges with prefixed source and target.
479            for edge in schema.edges.keys() {
480                let prefixed_src = format!("{prefix}::{}", edge.src);
481                let prefixed_tgt = format!("{prefix}::{}", edge.tgt);
482                let edge_name = edge.name.as_ref().map(|n| {
483                    let prefixed = format!("{prefix}::{n}");
484                    prefixed
485                });
486                builder = builder
487                    .edge(
488                        &prefixed_src,
489                        &prefixed_tgt,
490                        edge.kind.as_ref(),
491                        edge_name.as_deref(),
492                    )
493                    .map_err(|e| ProjectError::CoproductFailed {
494                        reason: format!("edge {prefixed_src} -> {prefixed_tgt}: {e}"),
495                    })?;
496            }
497
498            file_map.insert(path.clone(), file_vertices);
499        }
500
501        let mut schema = builder.build().map_err(|e| ProjectError::CoproductFailed {
502            reason: format!("build: {e}"),
503        })?;
504
505        let protocol_map: HashMap<PathBuf, String> = self.protocol_map.into_iter().collect();
506
507        // Resolve cross-file imports using default rules.
508        let rules = resolve::default_rules();
509        let _resolved = resolve::resolve_imports(&mut schema, &file_map, &protocol_map, &rules);
510
511        Ok(ProjectSchema {
512            schema,
513            file_map,
514            protocol_map,
515        })
516    }
517}
518
519impl Default for ProjectBuilder {
520    fn default() -> Self {
521        Self::new()
522    }
523}
524
525/// A project schema stored as a Merkle tree of per-file schemas.
526///
527/// The root [`panproto_vcs::ObjectId`] points at a
528/// [`panproto_vcs::SchemaTreeObject`] whose leaves are
529/// [`panproto_vcs::FileSchemaObject`]s. Consumers that want the
530/// assembled flat [`Schema`] call
531/// [`panproto_vcs::assemble_schema`] with this root.
532#[derive(Debug, Clone)]
533pub struct ProjectSchemaTree {
534    /// Object ID of the root [`panproto_vcs::SchemaTreeObject`].
535    pub root_id: panproto_vcs::ObjectId,
536    /// Mapping from file path to the protocol used to parse it.
537    pub protocol_map: HashMap<PathBuf, String>,
538}
539
540/// Run the cross-file import resolver against the flat coproduct of
541/// the given per-file schemas and bucket the resulting import edges
542/// by the file that owns the importing vertex.
543///
544/// Returned edges carry vertex names already prefixed with their
545/// owning file's project path (`<path>::<name>`), matching the
546/// convention used by [`ProjectBuilder::build`], so that flat
547/// assembly (`panproto_vcs::assemble_schema`) can add them verbatim
548/// without re-prefixing.
549fn resolve_per_file_imports<H1, H2>(
550    file_schemas: &std::collections::HashMap<PathBuf, panproto_schema::Schema, H1>,
551    protocol_map: &std::collections::HashMap<PathBuf, String, H2>,
552) -> Result<std::collections::HashMap<PathBuf, Vec<panproto_schema::Edge>>, ProjectError>
553where
554    H1: std::hash::BuildHasher,
555    H2: std::hash::BuildHasher,
556{
557    if file_schemas.len() <= 1 {
558        return Ok(HashMap::new());
559    }
560
561    // Rebuild the flat coproduct just like `ProjectBuilder::build`
562    // does. The duplicated loop body is what makes the flat and tree
563    // paths agree on edge composition; factoring it out across the
564    // two would tangle `build_tree`'s lifetime with `build`'s.
565    let coproduct_protocol = panproto_schema::Protocol {
566        name: "project".into(),
567        schema_theory: "ThProjectSchema".into(),
568        instance_theory: "ThProjectInstance".into(),
569        schema_composition: None,
570        instance_composition: None,
571        edge_rules: vec![],
572        obj_kinds: vec![],
573        constraint_sorts: vec![],
574        has_order: true,
575        has_coproducts: false,
576        has_recursion: false,
577        has_causal: false,
578        nominal_identity: false,
579        has_defaults: false,
580        has_coercions: false,
581        has_mergers: false,
582        has_policies: false,
583    };
584
585    let mut builder = panproto_schema::SchemaBuilder::new(&coproduct_protocol);
586    let mut file_map: HashMap<PathBuf, Vec<panproto_gat::Name>> = HashMap::new();
587
588    for (path, schema) in file_schemas {
589        let prefix = path.display().to_string();
590        let mut file_vertices = Vec::new();
591        for (name, vertex) in &schema.vertices {
592            let prefixed_name = format!("{prefix}::{name}");
593            builder = builder
594                .vertex(&prefixed_name, vertex.kind.as_ref(), None)
595                .map_err(|e| ProjectError::CoproductFailed {
596                    reason: format!("vertex {prefixed_name}: {e}"),
597                })?;
598            file_vertices.push(panproto_gat::Name::from(prefixed_name.as_str()));
599            if let Some(constraints) = schema.constraints.get(name) {
600                for c in constraints {
601                    builder = builder.constraint(&prefixed_name, c.sort.as_ref(), &c.value);
602                }
603            }
604        }
605        for edge in schema.edges.keys() {
606            let prefixed_src = format!("{prefix}::{}", edge.src);
607            let prefixed_tgt = format!("{prefix}::{}", edge.tgt);
608            let edge_name = edge.name.as_ref().map(|n| format!("{prefix}::{n}"));
609            builder = builder
610                .edge(
611                    &prefixed_src,
612                    &prefixed_tgt,
613                    edge.kind.as_ref(),
614                    edge_name.as_deref(),
615                )
616                .map_err(|e| ProjectError::CoproductFailed {
617                    reason: format!("edge {prefixed_src} -> {prefixed_tgt}: {e}"),
618                })?;
619        }
620        file_map.insert(path.clone(), file_vertices);
621    }
622
623    let mut schema = builder.build().map_err(|e| ProjectError::CoproductFailed {
624        reason: format!("build: {e}"),
625    })?;
626    let protocols: HashMap<PathBuf, String> = protocol_map
627        .iter()
628        .map(|(k, v)| (k.clone(), v.clone()))
629        .collect();
630
631    let before: std::collections::HashSet<panproto_schema::Edge> =
632        schema.edges.keys().cloned().collect();
633    let rules = resolve::default_rules();
634    resolve::resolve_imports(&mut schema, &file_map, &protocols, &rules);
635
636    let new_edges: Vec<panproto_schema::Edge> = schema
637        .edges
638        .keys()
639        .filter(|e| !before.contains(*e))
640        .cloned()
641        .collect();
642    bucket_new_edges(&new_edges, &file_map)
643}
644
645/// Bucket newly synthesized cross-file import edges by the file whose
646/// vertex list contains each edge's `src`. Surfaces
647/// [`ProjectError::OrphanImportEdge`] if any edge's src matches no file.
648fn bucket_new_edges<H>(
649    new_edges: &[panproto_schema::Edge],
650    file_map: &HashMap<PathBuf, Vec<panproto_gat::Name>, H>,
651) -> Result<HashMap<PathBuf, Vec<panproto_schema::Edge>>, ProjectError>
652where
653    H: std::hash::BuildHasher,
654{
655    let mut by_file: HashMap<PathBuf, Vec<panproto_schema::Edge>> = HashMap::new();
656    for edge in new_edges {
657        let Some(owner) = file_map
658            .iter()
659            .find(|(_, verts)| verts.iter().any(|v| v == &edge.src))
660            .map(|(path, _)| path.clone())
661        else {
662            return Err(ProjectError::OrphanImportEdge {
663                src: edge.src.to_string(),
664                tgt: edge.tgt.to_string(),
665            });
666        };
667        by_file.entry(owner).or_default().push(edge.clone());
668    }
669    Ok(by_file)
670}
671
672/// Build a project schema tree and store it in `store`.
673///
674/// Each `(path, schema)` pair in `files` becomes a
675/// [`panproto_vcs::FileSchemaObject`]; the path components induce a
676/// directory hierarchy of
677/// [`panproto_vcs::SchemaTreeObject`]s stored bottom up. The returned
678/// [`panproto_vcs::ObjectId`] is stable across input-order
679/// permutations because sibling entries are sorted lexicographically.
680///
681/// `protocols` carries the per-file protocol names so the stored
682/// leaves can record how each file was parsed; missing entries fall
683/// back to `"raw_file"`. `cross_file_edges` carries the already-
684/// prefixed cross-file import edges rooted at each file; pass an
685/// empty map to skip cross-file resolution.
686///
687/// # Errors
688///
689/// Returns [`ProjectError::CoproductFailed`] if the underlying store
690/// rejects an object write.
691pub fn build_project_tree<S, H1, H2, H3>(
692    store: &mut S,
693    files: &std::collections::HashMap<PathBuf, panproto_schema::Schema, H1>,
694    protocols: &std::collections::HashMap<PathBuf, String, H2>,
695    cross_file_edges: &std::collections::HashMap<PathBuf, Vec<panproto_schema::Edge>, H3>,
696) -> Result<panproto_vcs::ObjectId, ProjectError>
697where
698    S: panproto_vcs::Store,
699    H1: std::hash::BuildHasher,
700    H2: std::hash::BuildHasher,
701    H3: std::hash::BuildHasher,
702{
703    let mut leaves: Vec<(PathBuf, panproto_vcs::FileSchemaObject)> = files
704        .iter()
705        .map(|(path, schema)| {
706            let protocol = protocols
707                .get(path)
708                .cloned()
709                .unwrap_or_else(|| "raw_file".to_owned());
710            let mut cross = cross_file_edges.get(path).cloned().unwrap_or_default();
711            // Canonicalize wire order: sort so that the emitted bytes
712            // are stable across input permutations. `hash_file_schema`
713            // also sorts before hashing, but canonicalizing here makes
714            // the stored wire bytes themselves deterministic.
715            cross.sort();
716            let file = panproto_vcs::FileSchemaObject {
717                path: path.display().to_string(),
718                protocol,
719                schema: schema.clone(),
720                cross_file_edges: cross,
721            };
722            (path.clone(), file)
723        })
724        .collect();
725    leaves.sort_by(|a, b| a.0.cmp(&b.0));
726
727    panproto_vcs::build_schema_tree(store, leaves).map_err(|e| ProjectError::CoproductFailed {
728        reason: format!("build_schema_tree: {e}"),
729    })
730}
731
732#[cfg(test)]
733#[allow(clippy::unwrap_used)]
734mod tests {
735    use super::*;
736
737    #[test]
738    fn single_file_project() {
739        let mut builder = ProjectBuilder::new();
740        builder
741            .add_file(
742                Path::new("main.ts"),
743                b"function hello(): string { return 'Hello'; }",
744            )
745            .unwrap();
746
747        assert_eq!(builder.file_count(), 1);
748
749        let project = builder.build().unwrap();
750        assert!(!project.schema.vertices.is_empty());
751        assert_eq!(project.file_map.len(), 1);
752        assert_eq!(project.protocol_map.len(), 1);
753        assert_eq!(
754            project.protocol_map.get(Path::new("main.ts")),
755            Some(&"typescript".to_owned())
756        );
757    }
758
759    #[test]
760    fn multi_file_project() {
761        let mut builder = ProjectBuilder::new();
762
763        builder
764            .add_file(
765                Path::new("src/main.ts"),
766                b"function main(): void { console.log('hello'); }",
767            )
768            .unwrap();
769
770        builder
771            .add_file(
772                Path::new("src/utils.ts"),
773                b"export function add(a: number, b: number): number { return a + b; }",
774            )
775            .unwrap();
776
777        assert_eq!(builder.file_count(), 2);
778
779        let project = builder.build().unwrap();
780        assert!(project.schema.vertices.len() > 5);
781        assert_eq!(project.file_map.len(), 2);
782    }
783
784    #[test]
785    fn raw_file_fallback() {
786        let mut builder = ProjectBuilder::new();
787
788        builder
789            .add_file(Path::new("README.md"), b"# Hello\n\nThis is a project.\n")
790            .unwrap();
791
792        let project = builder.build().unwrap();
793        assert_eq!(
794            project.protocol_map.get(Path::new("README.md")),
795            Some(&"raw_file".to_owned())
796        );
797    }
798
799    #[test]
800    fn mixed_languages() {
801        let mut builder = ProjectBuilder::new();
802
803        builder
804            .add_file(Path::new("main.py"), b"def main():\n    print('hello')\n")
805            .unwrap();
806
807        builder
808            .add_file(
809                Path::new("lib.rs"),
810                b"pub fn add(a: i32, b: i32) -> i32 { a + b }",
811            )
812            .unwrap();
813
814        builder
815            .add_file(Path::new("README.md"), b"# Mixed project\n")
816            .unwrap();
817
818        assert_eq!(builder.file_count(), 3);
819
820        let project = builder.build().unwrap();
821        assert_eq!(project.file_map.len(), 3);
822        assert_eq!(
823            project.protocol_map.get(Path::new("main.py")),
824            Some(&"python".to_owned())
825        );
826        assert_eq!(
827            project.protocol_map.get(Path::new("lib.rs")),
828            Some(&"rust".to_owned())
829        );
830        assert_eq!(
831            project.protocol_map.get(Path::new("README.md")),
832            Some(&"raw_file".to_owned())
833        );
834    }
835
836    #[test]
837    fn empty_project_errors() {
838        let builder = ProjectBuilder::new();
839        let result = builder.build();
840        assert!(result.is_err());
841    }
842
843    #[test]
844    fn build_tree_stable_across_insertion_order() {
845        use panproto_vcs::MemStore;
846
847        let build = |paths: Vec<(&str, &[u8])>| -> panproto_vcs::ObjectId {
848            let mut builder = ProjectBuilder::new();
849            for (p, c) in paths {
850                builder.add_file(Path::new(p), c).unwrap();
851            }
852            let mut store = MemStore::new();
853            let tree = builder.build_tree(&mut store).unwrap();
854            tree.root_id
855        };
856
857        let forward = build(vec![
858            ("src/a.rs", b"pub fn a() {}"),
859            ("src/b.rs", b"pub fn b() {}"),
860        ]);
861        let reverse = build(vec![
862            ("src/b.rs", b"pub fn b() {}"),
863            ("src/a.rs", b"pub fn a() {}"),
864        ]);
865        assert_eq!(forward, reverse);
866    }
867
868    #[test]
869    fn build_tree_preserves_cross_file_imports() {
870        use panproto_vcs::MemStore;
871
872        // Two-file TypeScript project with an import: assembled flat
873        // schema from the tree must have the same edge count as the
874        // flat `build()` result.
875        let build_flat = || -> Schema {
876            let mut builder = ProjectBuilder::new();
877            builder
878                .add_file(
879                    Path::new("src/utils.ts"),
880                    b"export function add(a: number, b: number): number { return a + b; }\n",
881                )
882                .unwrap();
883            builder
884                .add_file(
885                    Path::new("src/main.ts"),
886                    b"import { add } from './utils';\nadd(1, 2);\n",
887                )
888                .unwrap();
889            builder.build().unwrap().schema
890        };
891
892        let build_tree_flat = || -> Schema {
893            let mut builder = ProjectBuilder::new();
894            builder
895                .add_file(
896                    Path::new("src/utils.ts"),
897                    b"export function add(a: number, b: number): number { return a + b; }\n",
898                )
899                .unwrap();
900            builder
901                .add_file(
902                    Path::new("src/main.ts"),
903                    b"import { add } from './utils';\nadd(1, 2);\n",
904                )
905                .unwrap();
906            let mut store = MemStore::new();
907            let tree = builder.build_tree(&mut store).unwrap();
908            let proto = panproto_vcs::project_coproduct_protocol();
909            panproto_vcs::assemble_schema(&store, &tree.root_id, &proto).unwrap()
910        };
911
912        let flat = build_flat();
913        let assembled = build_tree_flat();
914        assert_eq!(
915            flat.edges.len(),
916            assembled.edges.len(),
917            "tree-built project drops edges; cross-file imports are likely missing"
918        );
919    }
920
921    #[test]
922    fn build_tree_assembles_back_to_flat_schema() {
923        use panproto_vcs::MemStore;
924
925        let mut builder = ProjectBuilder::new();
926        builder
927            .add_file(Path::new("x.rs"), b"pub fn x() {}")
928            .unwrap();
929        builder
930            .add_file(Path::new("y.rs"), b"pub fn y() {}")
931            .unwrap();
932        let flat = builder.build().unwrap().schema;
933
934        let mut builder = ProjectBuilder::new();
935        builder
936            .add_file(Path::new("x.rs"), b"pub fn x() {}")
937            .unwrap();
938        builder
939            .add_file(Path::new("y.rs"), b"pub fn y() {}")
940            .unwrap();
941        let mut store = MemStore::new();
942        let tree = builder.build_tree(&mut store).unwrap();
943        let proto = panproto_vcs::project_coproduct_protocol();
944        let assembled = panproto_vcs::assemble_schema(&store, &tree.root_id, &proto).unwrap();
945
946        // Match on vertex and edge counts; the assembled form must
947        // carry the same structural content as the flat build.
948        assert_eq!(flat.vertices.len(), assembled.vertices.len());
949        assert_eq!(flat.edges.len(), assembled.edges.len());
950    }
951
952    #[test]
953    fn cross_file_edges_wire_bytes_are_deterministic() {
954        use panproto_gat::Name;
955        use panproto_schema::Edge;
956        use panproto_vcs::FileSchemaObject;
957
958        // Two synthetic edges rooted at the same vertex but ordered
959        // differently in the input Vec. Emitted wire bytes must match.
960        let e1 = Edge {
961            src: Name::from("src/main.ts::importStmt"),
962            tgt: Name::from("src/a.ts::exportA"),
963            kind: Name::from("imports"),
964            name: None,
965        };
966        let e2 = Edge {
967            src: Name::from("src/main.ts::importStmt"),
968            tgt: Name::from("src/b.ts::exportB"),
969            kind: Name::from("imports"),
970            name: None,
971        };
972
973        let mut files_a = HashMap::new();
974        let tiny = panproto_schema::SchemaBuilder::new(&panproto_schema::Protocol {
975            name: "project".into(),
976            ..Default::default()
977        })
978        .vertex("x", "record", None)
979        .unwrap()
980        .build()
981        .unwrap();
982        files_a.insert(PathBuf::from("src/main.ts"), tiny);
983        let mut protocols = HashMap::new();
984        protocols.insert(PathBuf::from("src/main.ts"), "typescript".to_owned());
985        let mut ce_forward = HashMap::new();
986        ce_forward.insert(PathBuf::from("src/main.ts"), vec![e1.clone(), e2.clone()]);
987        let mut ce_reverse = HashMap::new();
988        ce_reverse.insert(PathBuf::from("src/main.ts"), vec![e2, e1]);
989
990        let mut store_a = panproto_vcs::MemStore::new();
991        let mut store_b = panproto_vcs::MemStore::new();
992        let id_a = build_project_tree(&mut store_a, &files_a, &protocols, &ce_forward).unwrap();
993        let id_b = build_project_tree(&mut store_b, &files_a, &protocols, &ce_reverse).unwrap();
994        assert_eq!(
995            id_a, id_b,
996            "FileSchemaObject wire order must be deterministic"
997        );
998
999        // Stronger check: walk each store, find the wrapped
1000        // FileSchemaObject, and compare raw serialized bytes.
1001        let collect_bytes = |store: &panproto_vcs::MemStore, root: panproto_vcs::ObjectId| {
1002            let mut bytes: Vec<u8> = Vec::new();
1003            panproto_vcs::walk_tree(store, &root, |_, file: &FileSchemaObject| {
1004                bytes = serde_json::to_vec(file).unwrap();
1005                Ok(())
1006            })
1007            .unwrap();
1008            bytes
1009        };
1010        assert_eq!(collect_bytes(&store_a, id_a), collect_bytes(&store_b, id_b));
1011    }
1012
1013    #[test]
1014    fn orphan_import_edge_is_surfaced() {
1015        use panproto_gat::Name;
1016        use panproto_schema::Edge;
1017
1018        // Forge a new edge whose `src` is not in any file's vertex
1019        // list. The bucketing helper must surface OrphanImportEdge
1020        // rather than silently drop the edge.
1021        let mut file_map: HashMap<PathBuf, Vec<Name>> = HashMap::new();
1022        file_map.insert(
1023            PathBuf::from("src/a.ts"),
1024            vec![Name::from("src/a.ts::real")],
1025        );
1026
1027        let orphan = Edge {
1028            src: Name::from("unknown::ghost"),
1029            tgt: Name::from("src/a.ts::real"),
1030            kind: Name::from("imports"),
1031            name: None,
1032        };
1033
1034        let err = bucket_new_edges(&[orphan], &file_map).unwrap_err();
1035        match err {
1036            ProjectError::OrphanImportEdge { src, tgt } => {
1037                assert!(src.contains("ghost"));
1038                assert!(tgt.contains("real"));
1039            }
1040            other => panic!("expected OrphanImportEdge, got {other:?}"),
1041        }
1042    }
1043
1044    #[test]
1045    fn language_detection() {
1046        let registry = ParserRegistry::new();
1047        assert_eq!(
1048            detect::detect_language(Path::new("a.ts"), &registry),
1049            Some("typescript")
1050        );
1051        assert_eq!(
1052            detect::detect_language(Path::new("b.py"), &registry),
1053            Some("python")
1054        );
1055        assert_eq!(
1056            detect::detect_language(Path::new("c.rs"), &registry),
1057            Some("rust")
1058        );
1059        assert_eq!(detect::detect_language(Path::new("d.md"), &registry), None);
1060    }
1061}