Skip to main content

seqc/
resolver.rs

1//! Include Resolver for Seq
2//!
3//! Resolves include statements, loads and parses included files,
4//! and merges everything into a single Program.
5//!
6//! Supports:
7//! - `include std:name` - loads from embedded stdlib (or filesystem fallback)
8//! - `include ffi:name` - loads FFI manifest (collected but not processed here)
9//! - `include "path"` - loads relative to current file
10
11use crate::ast::{Include, Program, SourceLocation, UnionDef, WordDef};
12use crate::parser::Parser;
13use crate::stdlib_embed;
14use std::collections::{HashMap, HashSet};
15use std::path::{Path, PathBuf};
16
17/// Result of resolving includes
18pub struct ResolveResult {
19    /// The resolved program with all includes merged
20    pub program: Program,
21    /// FFI library names that were included (e.g., ["readline"])
22    pub ffi_includes: Vec<String>,
23    /// Filesystem .seq files that were included (for cache invalidation)
24    pub source_files: Vec<PathBuf>,
25    /// Embedded stdlib modules that were included
26    pub embedded_modules: Vec<String>,
27}
28
29/// Words and unions collected from a resolved include
30struct ResolvedContent {
31    words: Vec<WordDef>,
32    unions: Vec<UnionDef>,
33}
34
35/// Result of resolving an include - either embedded content or a file path
36#[derive(Debug)]
37enum ResolvedInclude {
38    /// Embedded stdlib content (name, content)
39    Embedded(String, &'static str),
40    /// File system path
41    FilePath(PathBuf),
42}
43
44/// Resolver for include statements
45pub struct Resolver {
46    /// Set of files already included (canonical paths to prevent double-include)
47    included_files: HashSet<PathBuf>,
48    /// Set of embedded stdlib modules already included
49    included_embedded: HashSet<String>,
50    /// Path to stdlib directory (fallback for non-embedded modules), if available
51    stdlib_path: Option<PathBuf>,
52    /// FFI libraries that were included
53    ffi_includes: Vec<String>,
54}
55
56impl Resolver {
57    /// Create a new resolver with an optional stdlib path for filesystem fallback
58    pub fn new(stdlib_path: Option<PathBuf>) -> Self {
59        Resolver {
60            included_files: HashSet::new(),
61            included_embedded: HashSet::new(),
62            stdlib_path,
63            ffi_includes: Vec::new(),
64        }
65    }
66
67    /// Resolve all includes in a program and return a merged program with FFI includes
68    ///
69    /// Takes the source file path and its already-parsed program.
70    /// Recursively resolves includes and merges all word and union definitions.
71    /// FFI includes are collected but not processed (they don't produce words/unions).
72    pub fn resolve(
73        &mut self,
74        source_path: &Path,
75        program: Program,
76    ) -> Result<ResolveResult, String> {
77        let source_path = source_path
78            .canonicalize()
79            .map_err(|e| format!("Failed to canonicalize {}: {}", source_path.display(), e))?;
80
81        // Mark this file as included
82        self.included_files.insert(source_path.clone());
83
84        let source_dir = source_path.parent().unwrap_or(Path::new("."));
85        let mut all_words = Vec::new();
86        let mut all_unions = Vec::new();
87
88        for mut word in program.words {
89            // Update source location with file path
90            if let Some(ref mut source) = word.source {
91                source.file = source_path.clone();
92            } else {
93                word.source = Some(SourceLocation::new(source_path.clone(), 0));
94            }
95            all_words.push(word);
96        }
97
98        for mut union_def in program.unions {
99            // Update source location with file path
100            if let Some(ref mut source) = union_def.source {
101                source.file = source_path.clone();
102            } else {
103                union_def.source = Some(SourceLocation::new(source_path.clone(), 0));
104            }
105            all_unions.push(union_def);
106        }
107
108        // Process includes
109        for include in &program.includes {
110            let content = self.process_include(include, source_dir)?;
111            all_words.extend(content.words);
112            all_unions.extend(content.unions);
113        }
114
115        let resolved_program = Program {
116            includes: Vec::new(), // Includes are resolved, no longer needed
117            unions: all_unions,
118            words: all_words,
119        };
120
121        // Note: Constructor generation is done in lib.rs after resolution
122        // to keep all constructor generation in one place
123
124        Ok(ResolveResult {
125            program: resolved_program,
126            ffi_includes: std::mem::take(&mut self.ffi_includes),
127            source_files: self.included_files.iter().cloned().collect(),
128            embedded_modules: self.included_embedded.iter().cloned().collect(),
129        })
130    }
131
132    /// Process a single include and return the resolved words and unions
133    fn process_include(
134        &mut self,
135        include: &Include,
136        source_dir: &Path,
137    ) -> Result<ResolvedContent, String> {
138        // Handle FFI includes specially - they don't produce words/unions,
139        // they're collected for later processing by the FFI system
140        if let Include::Ffi(name) = include {
141            // Check if we have the FFI manifest
142            if !crate::ffi::has_ffi_manifest(name) {
143                return Err(format!(
144                    "FFI library '{}' not found. Available: {}",
145                    name,
146                    crate::ffi::list_ffi_manifests().join(", ")
147                ));
148            }
149            // Avoid duplicate FFI includes
150            if !self.ffi_includes.contains(name) {
151                self.ffi_includes.push(name.clone());
152            }
153            // FFI includes don't add words/unions directly
154            return Ok(ResolvedContent {
155                words: Vec::new(),
156                unions: Vec::new(),
157            });
158        }
159
160        let resolved = self.resolve_include(include, source_dir)?;
161
162        match resolved {
163            ResolvedInclude::Embedded(name, content) => {
164                self.process_embedded_include(&name, content, source_dir)
165            }
166            ResolvedInclude::FilePath(path) => self.process_file_include(&path),
167        }
168    }
169
170    /// Process an embedded stdlib include
171    fn process_embedded_include(
172        &mut self,
173        name: &str,
174        content: &str,
175        source_dir: &Path,
176    ) -> Result<ResolvedContent, String> {
177        // Skip if already included
178        if self.included_embedded.contains(name) {
179            return Ok(ResolvedContent {
180                words: Vec::new(),
181                unions: Vec::new(),
182            });
183        }
184        self.included_embedded.insert(name.to_string());
185
186        // Parse the embedded content
187        let mut parser = Parser::new(content);
188        let included_program = parser
189            .parse()
190            .map_err(|e| format!("Failed to parse embedded module '{}': {}", name, e))?;
191
192        // Create a pseudo-path for source locations
193        let pseudo_path = PathBuf::from(format!("<stdlib:{}>", name));
194
195        // Collect words with updated source locations
196        let mut all_words = Vec::new();
197        for mut word in included_program.words {
198            if let Some(ref mut source) = word.source {
199                source.file = pseudo_path.clone();
200            } else {
201                word.source = Some(SourceLocation::new(pseudo_path.clone(), 0));
202            }
203            all_words.push(word);
204        }
205
206        // Collect unions with updated source locations
207        let mut all_unions = Vec::new();
208        for mut union_def in included_program.unions {
209            if let Some(ref mut source) = union_def.source {
210                source.file = pseudo_path.clone();
211            } else {
212                union_def.source = Some(SourceLocation::new(pseudo_path.clone(), 0));
213            }
214            all_unions.push(union_def);
215        }
216
217        // Recursively process includes from embedded module
218        for include in &included_program.includes {
219            let content = self.process_include(include, source_dir)?;
220            all_words.extend(content.words);
221            all_unions.extend(content.unions);
222        }
223
224        Ok(ResolvedContent {
225            words: all_words,
226            unions: all_unions,
227        })
228    }
229
230    /// Process a filesystem include
231    fn process_file_include(&mut self, path: &Path) -> Result<ResolvedContent, String> {
232        // Skip if already included (prevents diamond dependency issues)
233        let canonical = path
234            .canonicalize()
235            .map_err(|e| format!("Failed to canonicalize {}: {}", path.display(), e))?;
236
237        if self.included_files.contains(&canonical) {
238            return Ok(ResolvedContent {
239                words: Vec::new(),
240                unions: Vec::new(),
241            });
242        }
243
244        // Read and parse the included file
245        let content = std::fs::read_to_string(path)
246            .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
247
248        let mut parser = Parser::new(&content);
249        let included_program = parser.parse()?;
250
251        // Recursively resolve includes in the included file
252        let resolved = self.resolve(path, included_program)?;
253
254        Ok(ResolvedContent {
255            words: resolved.program.words,
256            unions: resolved.program.unions,
257        })
258    }
259
260    /// Resolve an include to either embedded content or a file path
261    fn resolve_include(
262        &self,
263        include: &Include,
264        source_dir: &Path,
265    ) -> Result<ResolvedInclude, String> {
266        match include {
267            Include::Std(name) => {
268                // Check embedded stdlib first
269                if let Some(content) = stdlib_embed::get_stdlib(name) {
270                    return Ok(ResolvedInclude::Embedded(name.clone(), content));
271                }
272
273                // Fall back to filesystem if stdlib_path is available
274                if let Some(ref stdlib_path) = self.stdlib_path {
275                    let path = stdlib_path.join(format!("{}.seq", name));
276                    if path.exists() {
277                        return Ok(ResolvedInclude::FilePath(path));
278                    }
279                }
280
281                // Not found anywhere
282                Err(format!(
283                    "Standard library module '{}' not found (not embedded{})",
284                    name,
285                    if self.stdlib_path.is_some() {
286                        " and not in stdlib directory"
287                    } else {
288                        ""
289                    }
290                ))
291            }
292            Include::Relative(rel_path) => Ok(ResolvedInclude::FilePath(
293                self.resolve_relative_path(rel_path, source_dir)?,
294            )),
295            Include::Ffi(_) => {
296                // FFI includes are handled separately in process_include
297                unreachable!("FFI includes should be handled before resolve_include is called")
298            }
299        }
300    }
301
302    /// Resolve a relative include path to a file path
303    ///
304    /// Paths can contain `..` to reference parent directories, but the resolved
305    /// path must stay within the project root (main source file's directory).
306    fn resolve_relative_path(&self, rel_path: &str, source_dir: &Path) -> Result<PathBuf, String> {
307        // Validate non-empty path
308        if rel_path.is_empty() {
309            return Err("Include path cannot be empty".to_string());
310        }
311
312        // Cross-platform absolute path detection
313        let rel_as_path = std::path::Path::new(rel_path);
314        if rel_as_path.is_absolute() {
315            return Err(format!(
316                "Include path '{}' is invalid: paths cannot be absolute",
317                rel_path
318            ));
319        }
320
321        let path = source_dir.join(format!("{}.seq", rel_path));
322        if !path.exists() {
323            return Err(format!(
324                "Include file '{}' not found at {}",
325                rel_path,
326                path.display()
327            ));
328        }
329
330        // Canonicalize to resolve symlinks and normalize the path
331        let canonical_path = path
332            .canonicalize()
333            .map_err(|e| format!("Failed to resolve include path '{}': {}", rel_path, e))?;
334
335        Ok(canonical_path)
336    }
337}
338
339/// Check for word name collisions across all definitions
340///
341/// Returns an error with helpful message if any word is defined multiple times.
342pub fn check_collisions(words: &[WordDef]) -> Result<(), String> {
343    let mut definitions: HashMap<&str, Vec<&SourceLocation>> = HashMap::new();
344
345    for word in words {
346        if let Some(ref source) = word.source {
347            definitions.entry(&word.name).or_default().push(source);
348        }
349    }
350
351    // Find collisions (words defined in multiple places)
352    let mut errors = Vec::new();
353    for (name, locations) in definitions {
354        if locations.len() > 1 {
355            let mut msg = format!("Word '{}' is defined multiple times:\n", name);
356            for loc in &locations {
357                msg.push_str(&format!("  - {}\n", loc));
358            }
359            msg.push_str("\nHint: Rename one of the definitions to avoid collision.");
360            errors.push(msg);
361        }
362    }
363
364    if errors.is_empty() {
365        Ok(())
366    } else {
367        Err(errors.join("\n\n"))
368    }
369}
370
371/// Check for union name collisions across all definitions
372///
373/// Returns an error with helpful message if any union is defined multiple times.
374pub fn check_union_collisions(unions: &[UnionDef]) -> Result<(), String> {
375    let mut definitions: HashMap<&str, Vec<&SourceLocation>> = HashMap::new();
376
377    for union_def in unions {
378        if let Some(ref source) = union_def.source {
379            definitions.entry(&union_def.name).or_default().push(source);
380        }
381    }
382
383    // Find collisions (unions defined in multiple places)
384    let mut errors = Vec::new();
385    for (name, locations) in definitions {
386        if locations.len() > 1 {
387            let mut msg = format!("Union '{}' is defined multiple times:\n", name);
388            for loc in &locations {
389                msg.push_str(&format!("  - {}\n", loc));
390            }
391            msg.push_str("\nHint: Rename one of the definitions to avoid collision.");
392            errors.push(msg);
393        }
394    }
395
396    if errors.is_empty() {
397        Ok(())
398    } else {
399        Err(errors.join("\n\n"))
400    }
401}
402
403/// Find the stdlib directory for filesystem fallback
404///
405/// Searches in order:
406/// 1. SEQ_STDLIB environment variable
407/// 2. Relative to the current executable (for installed compilers)
408/// 3. Relative to current directory (for development)
409///
410/// Returns None if no stdlib directory is found (embedded stdlib will be used).
411pub fn find_stdlib() -> Option<PathBuf> {
412    // Check environment variable first
413    if let Ok(path) = std::env::var("SEQ_STDLIB") {
414        let path = PathBuf::from(path);
415        if path.is_dir() {
416            return Some(path);
417        }
418        // If SEQ_STDLIB is set but invalid, log warning but continue
419        eprintln!(
420            "Warning: SEQ_STDLIB is set to '{}' but that directory doesn't exist",
421            path.display()
422        );
423    }
424
425    // Check relative to executable
426    if let Ok(exe_path) = std::env::current_exe()
427        && let Some(exe_dir) = exe_path.parent()
428    {
429        let stdlib_path = exe_dir.join("stdlib");
430        if stdlib_path.is_dir() {
431            return Some(stdlib_path);
432        }
433        // Also check one level up (for development builds)
434        if let Some(parent) = exe_dir.parent() {
435            let stdlib_path = parent.join("stdlib");
436            if stdlib_path.is_dir() {
437                return Some(stdlib_path);
438            }
439        }
440    }
441
442    // Check relative to current directory (development)
443    let local_stdlib = PathBuf::from("stdlib");
444    if local_stdlib.is_dir() {
445        return Some(local_stdlib.canonicalize().unwrap_or(local_stdlib));
446    }
447
448    // No filesystem stdlib found - that's OK, we have embedded stdlib
449    None
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    #[test]
457    fn test_collision_detection_no_collision() {
458        let words = vec![
459            WordDef {
460                name: "foo".to_string(),
461                effect: None,
462                body: vec![],
463                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
464                allowed_lints: vec![],
465            },
466            WordDef {
467                name: "bar".to_string(),
468                effect: None,
469                body: vec![],
470                source: Some(SourceLocation::new(PathBuf::from("b.seq"), 1)),
471                allowed_lints: vec![],
472            },
473        ];
474
475        assert!(check_collisions(&words).is_ok());
476    }
477
478    #[test]
479    fn test_collision_detection_with_collision() {
480        let words = vec![
481            WordDef {
482                name: "foo".to_string(),
483                effect: None,
484                body: vec![],
485                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
486                allowed_lints: vec![],
487            },
488            WordDef {
489                name: "foo".to_string(),
490                effect: None,
491                body: vec![],
492                source: Some(SourceLocation::new(PathBuf::from("b.seq"), 5)),
493                allowed_lints: vec![],
494            },
495        ];
496
497        let result = check_collisions(&words);
498        assert!(result.is_err());
499        let error = result.unwrap_err();
500        assert!(error.contains("foo"));
501        assert!(error.contains("a.seq"));
502        assert!(error.contains("b.seq"));
503        assert!(error.contains("multiple times"));
504    }
505
506    #[test]
507    fn test_collision_detection_same_file_different_lines() {
508        // Same word defined twice in same file on different lines
509        // This is still a collision (parser would typically catch this earlier)
510        let words = vec![
511            WordDef {
512                name: "foo".to_string(),
513                effect: None,
514                body: vec![],
515                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
516                allowed_lints: vec![],
517            },
518            WordDef {
519                name: "foo".to_string(),
520                effect: None,
521                body: vec![],
522                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 5)),
523                allowed_lints: vec![],
524            },
525        ];
526
527        // This IS a collision - same name defined twice
528        let result = check_collisions(&words);
529        assert!(result.is_err());
530    }
531
532    // Integration tests for embedded stdlib
533
534    #[test]
535    fn test_embedded_stdlib_imath_available() {
536        assert!(stdlib_embed::has_stdlib("imath"));
537    }
538
539    #[test]
540    fn test_embedded_stdlib_resolution() {
541        let resolver = Resolver::new(None);
542        let include = Include::Std("imath".to_string());
543        let result = resolver.resolve_include(&include, Path::new("."));
544        assert!(result.is_ok());
545        match result.unwrap() {
546            ResolvedInclude::Embedded(name, content) => {
547                assert_eq!(name, "imath");
548                assert!(content.contains("abs"));
549            }
550            ResolvedInclude::FilePath(_) => panic!("Expected embedded, got file path"),
551        }
552    }
553
554    #[test]
555    fn test_nonexistent_stdlib_module() {
556        let resolver = Resolver::new(None);
557        let include = Include::Std("nonexistent".to_string());
558        let result = resolver.resolve_include(&include, Path::new("."));
559        assert!(result.is_err());
560        assert!(result.unwrap_err().contains("not found"));
561    }
562
563    #[test]
564    fn test_resolver_with_no_stdlib_path() {
565        // Resolver should work with None stdlib_path, using only embedded modules
566        let resolver = Resolver::new(None);
567        assert!(resolver.stdlib_path.is_none());
568    }
569
570    #[test]
571    fn test_double_include_prevention_embedded() {
572        let mut resolver = Resolver::new(None);
573
574        // First include should work
575        let result1 = resolver.process_embedded_include(
576            "imath",
577            stdlib_embed::get_stdlib("imath").unwrap(),
578            Path::new("."),
579        );
580        assert!(result1.is_ok());
581        let content1 = result1.unwrap();
582        assert!(!content1.words.is_empty());
583
584        // Second include of same module should return empty (already included)
585        let result2 = resolver.process_embedded_include(
586            "imath",
587            stdlib_embed::get_stdlib("imath").unwrap(),
588            Path::new("."),
589        );
590        assert!(result2.is_ok());
591        let content2 = result2.unwrap();
592        assert!(content2.words.is_empty());
593        assert!(content2.unions.is_empty());
594    }
595
596    #[test]
597    fn test_cross_directory_include_allowed() {
598        // Test that ".." paths work for cross-directory includes
599        use std::fs;
600        use tempfile::tempdir;
601
602        let temp = tempdir().unwrap();
603        let root = temp.path();
604
605        // Create directory structure:
606        // root/
607        //   src/
608        //     lib/
609        //       helper.seq
610        //   tests/
611        //     test_main.seq (wants to include ../src/lib/helper)
612        let src = root.join("src");
613        let src_lib = src.join("lib");
614        let tests = root.join("tests");
615        fs::create_dir_all(&src_lib).unwrap();
616        fs::create_dir_all(&tests).unwrap();
617
618        // Create helper.seq in src/lib
619        fs::write(src_lib.join("helper.seq"), ": helper ( -- Int ) 42 ;\n").unwrap();
620
621        let resolver = Resolver::new(None);
622
623        // Resolve from tests directory: include ../src/lib/helper
624        let include = Include::Relative("../src/lib/helper".to_string());
625        let result = resolver.resolve_include(&include, &tests);
626
627        assert!(
628            result.is_ok(),
629            "Cross-directory include should succeed: {:?}",
630            result.err()
631        );
632
633        match result.unwrap() {
634            ResolvedInclude::FilePath(path) => {
635                assert!(path.ends_with("helper.seq"));
636            }
637            ResolvedInclude::Embedded(_, _) => panic!("Expected file path, got embedded"),
638        }
639    }
640
641    #[test]
642    fn test_dotdot_within_same_directory_structure() {
643        // Test that "../../file" resolves correctly
644        use std::fs;
645        use tempfile::tempdir;
646
647        let temp = tempdir().unwrap();
648        let project = temp.path();
649
650        // Create: project/a/b/c/ and project/a/target.seq
651        let deep = project.join("a").join("b").join("c");
652        fs::create_dir_all(&deep).unwrap();
653        fs::write(project.join("a").join("target.seq"), ": target ( -- ) ;\n").unwrap();
654
655        let resolver = Resolver::new(None);
656
657        // From a/b/c, include ../../target should work
658        let include = Include::Relative("../../target".to_string());
659        let result = resolver.resolve_include(&include, &deep);
660
661        assert!(
662            result.is_ok(),
663            "Include with .. should work: {:?}",
664            result.err()
665        );
666    }
667
668    #[test]
669    fn test_empty_include_path_rejected() {
670        let resolver = Resolver::new(None);
671        let include = Include::Relative("".to_string());
672        let result = resolver.resolve_include(&include, Path::new("."));
673
674        assert!(result.is_err(), "Empty include path should be rejected");
675        assert!(
676            result.unwrap_err().contains("cannot be empty"),
677            "Error should mention empty path"
678        );
679    }
680}