seqc/
resolver.rs

1//! Include Resolver for Seq
2//!
3//! Resolves include statements, loads and parses included files,
4//! and merges everything into a single Program.
5//!
6//! Supports:
7//! - `include std:name` - loads from stdlib directory
8//! - `include "path"` - loads relative to current file
9
10use crate::ast::{Include, Program, SourceLocation, WordDef};
11use crate::parser::Parser;
12use std::collections::{HashMap, HashSet};
13use std::path::{Path, PathBuf};
14
15/// Resolver for include statements
16pub struct Resolver {
17    /// Set of files already included (canonical paths to prevent double-include)
18    included: HashSet<PathBuf>,
19    /// Path to stdlib directory
20    stdlib_path: PathBuf,
21}
22
23impl Resolver {
24    /// Create a new resolver with the given stdlib path
25    pub fn new(stdlib_path: PathBuf) -> Self {
26        Resolver {
27            included: HashSet::new(),
28            stdlib_path,
29        }
30    }
31
32    /// Resolve all includes in a program and return a merged program
33    ///
34    /// Takes the source file path and its already-parsed program.
35    /// Recursively resolves includes and merges all word definitions.
36    pub fn resolve(&mut self, source_path: &Path, program: Program) -> Result<Program, String> {
37        let source_path = source_path
38            .canonicalize()
39            .map_err(|e| format!("Failed to canonicalize {}: {}", source_path.display(), e))?;
40
41        // Mark this file as included
42        self.included.insert(source_path.clone());
43
44        // Add source location to all words in main program
45        let source_dir = source_path.parent().unwrap_or(Path::new("."));
46        let mut all_words = Vec::new();
47
48        for mut word in program.words {
49            // Set source location if not already set
50            if word.source.is_none() {
51                word.source = Some(SourceLocation {
52                    file: source_path.clone(),
53                    line: 0, // TODO: Track actual line numbers
54                });
55            }
56            all_words.push(word);
57        }
58
59        // Process includes
60        for include in &program.includes {
61            let included_path = self.resolve_include_path(include, source_dir)?;
62
63            // Skip if already included (prevents diamond dependency issues)
64            let canonical = included_path.canonicalize().map_err(|e| {
65                format!("Failed to canonicalize {}: {}", included_path.display(), e)
66            })?;
67
68            if self.included.contains(&canonical) {
69                continue;
70            }
71
72            // Read and parse the included file
73            let content = std::fs::read_to_string(&included_path)
74                .map_err(|e| format!("Failed to read {}: {}", included_path.display(), e))?;
75
76            let mut parser = Parser::new(&content);
77            let included_program = parser.parse()?;
78
79            // Recursively resolve includes in the included file
80            let resolved = self.resolve(&included_path, included_program)?;
81
82            // Add all words from the resolved program
83            all_words.extend(resolved.words);
84        }
85
86        Ok(Program {
87            includes: Vec::new(), // Includes are resolved, no longer needed
88            words: all_words,
89        })
90    }
91
92    /// Resolve an include to a file path
93    fn resolve_include_path(
94        &self,
95        include: &Include,
96        source_dir: &Path,
97    ) -> Result<PathBuf, String> {
98        match include {
99            Include::Std(name) => {
100                let path = self.stdlib_path.join(format!("{}.seq", name));
101                if !path.exists() {
102                    return Err(format!(
103                        "Standard library module '{}' not found at {}",
104                        name,
105                        path.display()
106                    ));
107                }
108                Ok(path)
109            }
110            Include::Relative(rel_path) => {
111                // Security: Early rejection of obviously malicious paths
112                if rel_path.contains("..") {
113                    return Err(format!(
114                        "Include path '{}' is invalid: paths cannot contain '..'",
115                        rel_path
116                    ));
117                }
118
119                // Cross-platform absolute path detection
120                let rel_as_path = std::path::Path::new(rel_path);
121                if rel_as_path.is_absolute() {
122                    return Err(format!(
123                        "Include path '{}' is invalid: paths cannot be absolute",
124                        rel_path
125                    ));
126                }
127
128                let path = source_dir.join(format!("{}.seq", rel_path));
129                if !path.exists() {
130                    return Err(format!(
131                        "Include file '{}' not found at {}",
132                        rel_path,
133                        path.display()
134                    ));
135                }
136
137                // Security: Verify resolved path is within source directory
138                // This catches any bypass attempts (symlinks, encoded paths, etc.)
139                let canonical_path = path
140                    .canonicalize()
141                    .map_err(|e| format!("Failed to resolve include path '{}': {}", rel_path, e))?;
142                let canonical_source = source_dir
143                    .canonicalize()
144                    .map_err(|e| format!("Failed to resolve source directory: {}", e))?;
145
146                if !canonical_path.starts_with(&canonical_source) {
147                    return Err(format!(
148                        "Include path '{}' resolves outside the source directory",
149                        rel_path
150                    ));
151                }
152
153                Ok(canonical_path)
154            }
155        }
156    }
157}
158
159/// Check for word name collisions across all definitions
160///
161/// Returns an error with helpful message if any word is defined multiple times.
162pub fn check_collisions(words: &[WordDef]) -> Result<(), String> {
163    let mut definitions: HashMap<&str, Vec<&SourceLocation>> = HashMap::new();
164
165    for word in words {
166        if let Some(ref source) = word.source {
167            definitions.entry(&word.name).or_default().push(source);
168        }
169    }
170
171    // Find collisions (words defined in multiple places)
172    let mut errors = Vec::new();
173    for (name, locations) in definitions {
174        if locations.len() > 1 {
175            let mut msg = format!("Word '{}' is defined multiple times:\n", name);
176            for loc in &locations {
177                msg.push_str(&format!("  - {}\n", loc));
178            }
179            msg.push_str("\nHint: Rename one of the definitions to avoid collision.");
180            errors.push(msg);
181        }
182    }
183
184    if errors.is_empty() {
185        Ok(())
186    } else {
187        Err(errors.join("\n\n"))
188    }
189}
190
191/// Find the stdlib directory
192///
193/// Searches in order:
194/// 1. SEQ_STDLIB environment variable
195/// 2. Relative to the current executable (for installed compilers)
196/// 3. Relative to current directory (for development)
197pub fn find_stdlib() -> Result<PathBuf, String> {
198    // Check environment variable first
199    if let Ok(path) = std::env::var("SEQ_STDLIB") {
200        let path = PathBuf::from(path);
201        if path.is_dir() {
202            return Ok(path);
203        }
204        return Err(format!(
205            "SEQ_STDLIB is set to '{}' but that directory doesn't exist",
206            path.display()
207        ));
208    }
209
210    // Check relative to executable
211    if let Ok(exe_path) = std::env::current_exe()
212        && let Some(exe_dir) = exe_path.parent()
213    {
214        let stdlib_path = exe_dir.join("stdlib");
215        if stdlib_path.is_dir() {
216            return Ok(stdlib_path);
217        }
218        // Also check one level up (for development builds)
219        if let Some(parent) = exe_dir.parent() {
220            let stdlib_path = parent.join("stdlib");
221            if stdlib_path.is_dir() {
222                return Ok(stdlib_path);
223            }
224        }
225    }
226
227    // Check relative to current directory (development)
228    let local_stdlib = PathBuf::from("stdlib");
229    if local_stdlib.is_dir() {
230        return Ok(local_stdlib.canonicalize().unwrap_or(local_stdlib));
231    }
232
233    Err(
234        "Could not find stdlib directory. Set SEQ_STDLIB environment variable \
235         or ensure stdlib/ exists in the project root."
236            .to_string(),
237    )
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn test_collision_detection_no_collision() {
246        let words = vec![
247            WordDef {
248                name: "foo".to_string(),
249                effect: None,
250                body: vec![],
251                source: Some(SourceLocation {
252                    file: PathBuf::from("a.seq"),
253                    line: 1,
254                }),
255            },
256            WordDef {
257                name: "bar".to_string(),
258                effect: None,
259                body: vec![],
260                source: Some(SourceLocation {
261                    file: PathBuf::from("b.seq"),
262                    line: 1,
263                }),
264            },
265        ];
266
267        assert!(check_collisions(&words).is_ok());
268    }
269
270    #[test]
271    fn test_collision_detection_with_collision() {
272        let words = vec![
273            WordDef {
274                name: "foo".to_string(),
275                effect: None,
276                body: vec![],
277                source: Some(SourceLocation {
278                    file: PathBuf::from("a.seq"),
279                    line: 1,
280                }),
281            },
282            WordDef {
283                name: "foo".to_string(),
284                effect: None,
285                body: vec![],
286                source: Some(SourceLocation {
287                    file: PathBuf::from("b.seq"),
288                    line: 5,
289                }),
290            },
291        ];
292
293        let result = check_collisions(&words);
294        assert!(result.is_err());
295        let error = result.unwrap_err();
296        assert!(error.contains("foo"));
297        assert!(error.contains("a.seq"));
298        assert!(error.contains("b.seq"));
299        assert!(error.contains("multiple times"));
300    }
301
302    #[test]
303    fn test_collision_detection_same_file_different_lines() {
304        // Same word defined twice in same file on different lines
305        // This is still a collision (parser would typically catch this earlier)
306        let words = vec![
307            WordDef {
308                name: "foo".to_string(),
309                effect: None,
310                body: vec![],
311                source: Some(SourceLocation {
312                    file: PathBuf::from("a.seq"),
313                    line: 1,
314                }),
315            },
316            WordDef {
317                name: "foo".to_string(),
318                effect: None,
319                body: vec![],
320                source: Some(SourceLocation {
321                    file: PathBuf::from("a.seq"),
322                    line: 5,
323                }),
324            },
325        ];
326
327        // This IS a collision - same name defined twice
328        let result = check_collisions(&words);
329        assert!(result.is_err());
330    }
331}