seqc/
resolver.rs

1//! Include Resolver for Seq
2//!
3//! Resolves include statements, loads and parses included files,
4//! and merges everything into a single Program.
5//!
6//! Supports:
7//! - `include std:name` - loads from embedded stdlib (or filesystem fallback)
8//! - `include ffi:name` - loads FFI manifest (collected but not processed here)
9//! - `include "path"` - loads relative to current file
10
11use crate::ast::{Include, Program, SourceLocation, UnionDef, WordDef};
12use crate::parser::Parser;
13use crate::stdlib_embed;
14use std::collections::{HashMap, HashSet};
15use std::path::{Path, PathBuf};
16
17/// Result of resolving includes
18pub struct ResolveResult {
19    /// The resolved program with all includes merged
20    pub program: Program,
21    /// FFI library names that were included (e.g., ["readline"])
22    pub ffi_includes: Vec<String>,
23}
24
25/// Words and unions collected from a resolved include
26struct ResolvedContent {
27    words: Vec<WordDef>,
28    unions: Vec<UnionDef>,
29}
30
31/// Result of resolving an include - either embedded content or a file path
32#[derive(Debug)]
33enum ResolvedInclude {
34    /// Embedded stdlib content (name, content)
35    Embedded(String, &'static str),
36    /// File system path
37    FilePath(PathBuf),
38}
39
40/// Resolver for include statements
41pub struct Resolver {
42    /// Set of files already included (canonical paths to prevent double-include)
43    included_files: HashSet<PathBuf>,
44    /// Set of embedded stdlib modules already included
45    included_embedded: HashSet<String>,
46    /// Path to stdlib directory (fallback for non-embedded modules), if available
47    stdlib_path: Option<PathBuf>,
48    /// FFI libraries that were included
49    ffi_includes: Vec<String>,
50}
51
52impl Resolver {
53    /// Create a new resolver with an optional stdlib path for filesystem fallback
54    pub fn new(stdlib_path: Option<PathBuf>) -> Self {
55        Resolver {
56            included_files: HashSet::new(),
57            included_embedded: HashSet::new(),
58            stdlib_path,
59            ffi_includes: Vec::new(),
60        }
61    }
62
63    /// Resolve all includes in a program and return a merged program with FFI includes
64    ///
65    /// Takes the source file path and its already-parsed program.
66    /// Recursively resolves includes and merges all word and union definitions.
67    /// FFI includes are collected but not processed (they don't produce words/unions).
68    pub fn resolve(
69        &mut self,
70        source_path: &Path,
71        program: Program,
72    ) -> Result<ResolveResult, String> {
73        let source_path = source_path
74            .canonicalize()
75            .map_err(|e| format!("Failed to canonicalize {}: {}", source_path.display(), e))?;
76
77        // Mark this file as included
78        self.included_files.insert(source_path.clone());
79
80        let source_dir = source_path.parent().unwrap_or(Path::new("."));
81        let mut all_words = Vec::new();
82        let mut all_unions = Vec::new();
83
84        for mut word in program.words {
85            // Update source location with file path
86            if let Some(ref mut source) = word.source {
87                source.file = source_path.clone();
88            } else {
89                word.source = Some(SourceLocation::new(source_path.clone(), 0));
90            }
91            all_words.push(word);
92        }
93
94        for mut union_def in program.unions {
95            // Update source location with file path
96            if let Some(ref mut source) = union_def.source {
97                source.file = source_path.clone();
98            } else {
99                union_def.source = Some(SourceLocation::new(source_path.clone(), 0));
100            }
101            all_unions.push(union_def);
102        }
103
104        // Process includes
105        for include in &program.includes {
106            let content = self.process_include(include, source_dir)?;
107            all_words.extend(content.words);
108            all_unions.extend(content.unions);
109        }
110
111        let resolved_program = Program {
112            includes: Vec::new(), // Includes are resolved, no longer needed
113            unions: all_unions,
114            words: all_words,
115        };
116
117        // Note: Constructor generation is done in lib.rs after resolution
118        // to keep all constructor generation in one place
119
120        Ok(ResolveResult {
121            program: resolved_program,
122            ffi_includes: std::mem::take(&mut self.ffi_includes),
123        })
124    }
125
126    /// Process a single include and return the resolved words and unions
127    fn process_include(
128        &mut self,
129        include: &Include,
130        source_dir: &Path,
131    ) -> Result<ResolvedContent, String> {
132        // Handle FFI includes specially - they don't produce words/unions,
133        // they're collected for later processing by the FFI system
134        if let Include::Ffi(name) = include {
135            // Check if we have the FFI manifest
136            if !crate::ffi::has_ffi_manifest(name) {
137                return Err(format!(
138                    "FFI library '{}' not found. Available: {}",
139                    name,
140                    crate::ffi::list_ffi_manifests().join(", ")
141                ));
142            }
143            // Avoid duplicate FFI includes
144            if !self.ffi_includes.contains(name) {
145                self.ffi_includes.push(name.clone());
146            }
147            // FFI includes don't add words/unions directly
148            return Ok(ResolvedContent {
149                words: Vec::new(),
150                unions: Vec::new(),
151            });
152        }
153
154        let resolved = self.resolve_include(include, source_dir)?;
155
156        match resolved {
157            ResolvedInclude::Embedded(name, content) => {
158                self.process_embedded_include(&name, content, source_dir)
159            }
160            ResolvedInclude::FilePath(path) => self.process_file_include(&path),
161        }
162    }
163
164    /// Process an embedded stdlib include
165    fn process_embedded_include(
166        &mut self,
167        name: &str,
168        content: &str,
169        source_dir: &Path,
170    ) -> Result<ResolvedContent, String> {
171        // Skip if already included
172        if self.included_embedded.contains(name) {
173            return Ok(ResolvedContent {
174                words: Vec::new(),
175                unions: Vec::new(),
176            });
177        }
178        self.included_embedded.insert(name.to_string());
179
180        // Parse the embedded content
181        let mut parser = Parser::new(content);
182        let included_program = parser
183            .parse()
184            .map_err(|e| format!("Failed to parse embedded module '{}': {}", name, e))?;
185
186        // Create a pseudo-path for source locations
187        let pseudo_path = PathBuf::from(format!("<stdlib:{}>", name));
188
189        // Collect words with updated source locations
190        let mut all_words = Vec::new();
191        for mut word in included_program.words {
192            if let Some(ref mut source) = word.source {
193                source.file = pseudo_path.clone();
194            } else {
195                word.source = Some(SourceLocation::new(pseudo_path.clone(), 0));
196            }
197            all_words.push(word);
198        }
199
200        // Collect unions with updated source locations
201        let mut all_unions = Vec::new();
202        for mut union_def in included_program.unions {
203            if let Some(ref mut source) = union_def.source {
204                source.file = pseudo_path.clone();
205            } else {
206                union_def.source = Some(SourceLocation::new(pseudo_path.clone(), 0));
207            }
208            all_unions.push(union_def);
209        }
210
211        // Recursively process includes from embedded module
212        for include in &included_program.includes {
213            let content = self.process_include(include, source_dir)?;
214            all_words.extend(content.words);
215            all_unions.extend(content.unions);
216        }
217
218        Ok(ResolvedContent {
219            words: all_words,
220            unions: all_unions,
221        })
222    }
223
224    /// Process a filesystem include
225    fn process_file_include(&mut self, path: &Path) -> Result<ResolvedContent, String> {
226        // Skip if already included (prevents diamond dependency issues)
227        let canonical = path
228            .canonicalize()
229            .map_err(|e| format!("Failed to canonicalize {}: {}", path.display(), e))?;
230
231        if self.included_files.contains(&canonical) {
232            return Ok(ResolvedContent {
233                words: Vec::new(),
234                unions: Vec::new(),
235            });
236        }
237
238        // Read and parse the included file
239        let content = std::fs::read_to_string(path)
240            .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
241
242        let mut parser = Parser::new(&content);
243        let included_program = parser.parse()?;
244
245        // Recursively resolve includes in the included file
246        let resolved = self.resolve(path, included_program)?;
247
248        Ok(ResolvedContent {
249            words: resolved.program.words,
250            unions: resolved.program.unions,
251        })
252    }
253
254    /// Resolve an include to either embedded content or a file path
255    fn resolve_include(
256        &self,
257        include: &Include,
258        source_dir: &Path,
259    ) -> Result<ResolvedInclude, String> {
260        match include {
261            Include::Std(name) => {
262                // Check embedded stdlib first
263                if let Some(content) = stdlib_embed::get_stdlib(name) {
264                    return Ok(ResolvedInclude::Embedded(name.clone(), content));
265                }
266
267                // Fall back to filesystem if stdlib_path is available
268                if let Some(ref stdlib_path) = self.stdlib_path {
269                    let path = stdlib_path.join(format!("{}.seq", name));
270                    if path.exists() {
271                        return Ok(ResolvedInclude::FilePath(path));
272                    }
273                }
274
275                // Not found anywhere
276                Err(format!(
277                    "Standard library module '{}' not found (not embedded{})",
278                    name,
279                    if self.stdlib_path.is_some() {
280                        " and not in stdlib directory"
281                    } else {
282                        ""
283                    }
284                ))
285            }
286            Include::Relative(rel_path) => Ok(ResolvedInclude::FilePath(
287                self.resolve_relative_path(rel_path, source_dir)?,
288            )),
289            Include::Ffi(_) => {
290                // FFI includes are handled separately in process_include
291                unreachable!("FFI includes should be handled before resolve_include is called")
292            }
293        }
294    }
295
296    /// Resolve a relative include path to a file path
297    ///
298    /// Paths can contain `..` to reference parent directories, but the resolved
299    /// path must stay within the project root (main source file's directory).
300    fn resolve_relative_path(&self, rel_path: &str, source_dir: &Path) -> Result<PathBuf, String> {
301        // Validate non-empty path
302        if rel_path.is_empty() {
303            return Err("Include path cannot be empty".to_string());
304        }
305
306        // Cross-platform absolute path detection
307        let rel_as_path = std::path::Path::new(rel_path);
308        if rel_as_path.is_absolute() {
309            return Err(format!(
310                "Include path '{}' is invalid: paths cannot be absolute",
311                rel_path
312            ));
313        }
314
315        let path = source_dir.join(format!("{}.seq", rel_path));
316        if !path.exists() {
317            return Err(format!(
318                "Include file '{}' not found at {}",
319                rel_path,
320                path.display()
321            ));
322        }
323
324        // Canonicalize to resolve symlinks and normalize the path
325        let canonical_path = path
326            .canonicalize()
327            .map_err(|e| format!("Failed to resolve include path '{}': {}", rel_path, e))?;
328
329        Ok(canonical_path)
330    }
331}
332
333/// Check for word name collisions across all definitions
334///
335/// Returns an error with helpful message if any word is defined multiple times.
336pub fn check_collisions(words: &[WordDef]) -> Result<(), String> {
337    let mut definitions: HashMap<&str, Vec<&SourceLocation>> = HashMap::new();
338
339    for word in words {
340        if let Some(ref source) = word.source {
341            definitions.entry(&word.name).or_default().push(source);
342        }
343    }
344
345    // Find collisions (words defined in multiple places)
346    let mut errors = Vec::new();
347    for (name, locations) in definitions {
348        if locations.len() > 1 {
349            let mut msg = format!("Word '{}' is defined multiple times:\n", name);
350            for loc in &locations {
351                msg.push_str(&format!("  - {}\n", loc));
352            }
353            msg.push_str("\nHint: Rename one of the definitions to avoid collision.");
354            errors.push(msg);
355        }
356    }
357
358    if errors.is_empty() {
359        Ok(())
360    } else {
361        Err(errors.join("\n\n"))
362    }
363}
364
365/// Check for union name collisions across all definitions
366///
367/// Returns an error with helpful message if any union is defined multiple times.
368pub fn check_union_collisions(unions: &[UnionDef]) -> Result<(), String> {
369    let mut definitions: HashMap<&str, Vec<&SourceLocation>> = HashMap::new();
370
371    for union_def in unions {
372        if let Some(ref source) = union_def.source {
373            definitions.entry(&union_def.name).or_default().push(source);
374        }
375    }
376
377    // Find collisions (unions defined in multiple places)
378    let mut errors = Vec::new();
379    for (name, locations) in definitions {
380        if locations.len() > 1 {
381            let mut msg = format!("Union '{}' is defined multiple times:\n", name);
382            for loc in &locations {
383                msg.push_str(&format!("  - {}\n", loc));
384            }
385            msg.push_str("\nHint: Rename one of the definitions to avoid collision.");
386            errors.push(msg);
387        }
388    }
389
390    if errors.is_empty() {
391        Ok(())
392    } else {
393        Err(errors.join("\n\n"))
394    }
395}
396
397/// Find the stdlib directory for filesystem fallback
398///
399/// Searches in order:
400/// 1. SEQ_STDLIB environment variable
401/// 2. Relative to the current executable (for installed compilers)
402/// 3. Relative to current directory (for development)
403///
404/// Returns None if no stdlib directory is found (embedded stdlib will be used).
405pub fn find_stdlib() -> Option<PathBuf> {
406    // Check environment variable first
407    if let Ok(path) = std::env::var("SEQ_STDLIB") {
408        let path = PathBuf::from(path);
409        if path.is_dir() {
410            return Some(path);
411        }
412        // If SEQ_STDLIB is set but invalid, log warning but continue
413        eprintln!(
414            "Warning: SEQ_STDLIB is set to '{}' but that directory doesn't exist",
415            path.display()
416        );
417    }
418
419    // Check relative to executable
420    if let Ok(exe_path) = std::env::current_exe()
421        && let Some(exe_dir) = exe_path.parent()
422    {
423        let stdlib_path = exe_dir.join("stdlib");
424        if stdlib_path.is_dir() {
425            return Some(stdlib_path);
426        }
427        // Also check one level up (for development builds)
428        if let Some(parent) = exe_dir.parent() {
429            let stdlib_path = parent.join("stdlib");
430            if stdlib_path.is_dir() {
431                return Some(stdlib_path);
432            }
433        }
434    }
435
436    // Check relative to current directory (development)
437    let local_stdlib = PathBuf::from("stdlib");
438    if local_stdlib.is_dir() {
439        return Some(local_stdlib.canonicalize().unwrap_or(local_stdlib));
440    }
441
442    // No filesystem stdlib found - that's OK, we have embedded stdlib
443    None
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn test_collision_detection_no_collision() {
452        let words = vec![
453            WordDef {
454                name: "foo".to_string(),
455                effect: None,
456                body: vec![],
457                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
458            },
459            WordDef {
460                name: "bar".to_string(),
461                effect: None,
462                body: vec![],
463                source: Some(SourceLocation::new(PathBuf::from("b.seq"), 1)),
464            },
465        ];
466
467        assert!(check_collisions(&words).is_ok());
468    }
469
470    #[test]
471    fn test_collision_detection_with_collision() {
472        let words = vec![
473            WordDef {
474                name: "foo".to_string(),
475                effect: None,
476                body: vec![],
477                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
478            },
479            WordDef {
480                name: "foo".to_string(),
481                effect: None,
482                body: vec![],
483                source: Some(SourceLocation::new(PathBuf::from("b.seq"), 5)),
484            },
485        ];
486
487        let result = check_collisions(&words);
488        assert!(result.is_err());
489        let error = result.unwrap_err();
490        assert!(error.contains("foo"));
491        assert!(error.contains("a.seq"));
492        assert!(error.contains("b.seq"));
493        assert!(error.contains("multiple times"));
494    }
495
496    #[test]
497    fn test_collision_detection_same_file_different_lines() {
498        // Same word defined twice in same file on different lines
499        // This is still a collision (parser would typically catch this earlier)
500        let words = vec![
501            WordDef {
502                name: "foo".to_string(),
503                effect: None,
504                body: vec![],
505                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 1)),
506            },
507            WordDef {
508                name: "foo".to_string(),
509                effect: None,
510                body: vec![],
511                source: Some(SourceLocation::new(PathBuf::from("a.seq"), 5)),
512            },
513        ];
514
515        // This IS a collision - same name defined twice
516        let result = check_collisions(&words);
517        assert!(result.is_err());
518    }
519
520    // Integration tests for embedded stdlib
521
522    #[test]
523    fn test_embedded_stdlib_imath_available() {
524        assert!(stdlib_embed::has_stdlib("imath"));
525    }
526
527    #[test]
528    fn test_embedded_stdlib_resolution() {
529        let resolver = Resolver::new(None);
530        let include = Include::Std("imath".to_string());
531        let result = resolver.resolve_include(&include, Path::new("."));
532        assert!(result.is_ok());
533        match result.unwrap() {
534            ResolvedInclude::Embedded(name, content) => {
535                assert_eq!(name, "imath");
536                assert!(content.contains("abs"));
537            }
538            ResolvedInclude::FilePath(_) => panic!("Expected embedded, got file path"),
539        }
540    }
541
542    #[test]
543    fn test_nonexistent_stdlib_module() {
544        let resolver = Resolver::new(None);
545        let include = Include::Std("nonexistent".to_string());
546        let result = resolver.resolve_include(&include, Path::new("."));
547        assert!(result.is_err());
548        assert!(result.unwrap_err().contains("not found"));
549    }
550
551    #[test]
552    fn test_resolver_with_no_stdlib_path() {
553        // Resolver should work with None stdlib_path, using only embedded modules
554        let resolver = Resolver::new(None);
555        assert!(resolver.stdlib_path.is_none());
556    }
557
558    #[test]
559    fn test_double_include_prevention_embedded() {
560        let mut resolver = Resolver::new(None);
561
562        // First include should work
563        let result1 = resolver.process_embedded_include(
564            "imath",
565            stdlib_embed::get_stdlib("imath").unwrap(),
566            Path::new("."),
567        );
568        assert!(result1.is_ok());
569        let content1 = result1.unwrap();
570        assert!(!content1.words.is_empty());
571
572        // Second include of same module should return empty (already included)
573        let result2 = resolver.process_embedded_include(
574            "imath",
575            stdlib_embed::get_stdlib("imath").unwrap(),
576            Path::new("."),
577        );
578        assert!(result2.is_ok());
579        let content2 = result2.unwrap();
580        assert!(content2.words.is_empty());
581        assert!(content2.unions.is_empty());
582    }
583
584    #[test]
585    fn test_cross_directory_include_allowed() {
586        // Test that ".." paths work for cross-directory includes
587        use std::fs;
588        use tempfile::tempdir;
589
590        let temp = tempdir().unwrap();
591        let root = temp.path();
592
593        // Create directory structure:
594        // root/
595        //   src/
596        //     lib/
597        //       helper.seq
598        //   tests/
599        //     test_main.seq (wants to include ../src/lib/helper)
600        let src = root.join("src");
601        let src_lib = src.join("lib");
602        let tests = root.join("tests");
603        fs::create_dir_all(&src_lib).unwrap();
604        fs::create_dir_all(&tests).unwrap();
605
606        // Create helper.seq in src/lib
607        fs::write(src_lib.join("helper.seq"), ": helper ( -- Int ) 42 ;\n").unwrap();
608
609        let resolver = Resolver::new(None);
610
611        // Resolve from tests directory: include ../src/lib/helper
612        let include = Include::Relative("../src/lib/helper".to_string());
613        let result = resolver.resolve_include(&include, &tests);
614
615        assert!(
616            result.is_ok(),
617            "Cross-directory include should succeed: {:?}",
618            result.err()
619        );
620
621        match result.unwrap() {
622            ResolvedInclude::FilePath(path) => {
623                assert!(path.ends_with("helper.seq"));
624            }
625            ResolvedInclude::Embedded(_, _) => panic!("Expected file path, got embedded"),
626        }
627    }
628
629    #[test]
630    fn test_dotdot_within_same_directory_structure() {
631        // Test that "../../file" resolves correctly
632        use std::fs;
633        use tempfile::tempdir;
634
635        let temp = tempdir().unwrap();
636        let project = temp.path();
637
638        // Create: project/a/b/c/ and project/a/target.seq
639        let deep = project.join("a").join("b").join("c");
640        fs::create_dir_all(&deep).unwrap();
641        fs::write(project.join("a").join("target.seq"), ": target ( -- ) ;\n").unwrap();
642
643        let resolver = Resolver::new(None);
644
645        // From a/b/c, include ../../target should work
646        let include = Include::Relative("../../target".to_string());
647        let result = resolver.resolve_include(&include, &deep);
648
649        assert!(
650            result.is_ok(),
651            "Include with .. should work: {:?}",
652            result.err()
653        );
654    }
655
656    #[test]
657    fn test_empty_include_path_rejected() {
658        let resolver = Resolver::new(None);
659        let include = Include::Relative("".to_string());
660        let result = resolver.resolve_include(&include, Path::new("."));
661
662        assert!(result.is_err(), "Empty include path should be rejected");
663        assert!(
664            result.unwrap_err().contains("cannot be empty"),
665            "Error should mention empty path"
666        );
667    }
668}