Skip to main content

harn_parser/
analysis.rs

1use std::collections::{HashMap, HashSet};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5use harn_lexer::{Lexer, LexerError, Token};
6
7use crate::InlayHintInfo;
8use crate::{Parser, ParserError, SNode, TypeChecker, TypeDiagnostic};
9
10/// Stable source identity used by the incremental analysis cache.
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct SourceId(String);
13
14impl SourceId {
15    pub fn new(value: impl Into<String>) -> Self {
16        Self(value.into())
17    }
18
19    pub fn path(path: &Path) -> Self {
20        Self(path.to_string_lossy().into_owned())
21    }
22
23    pub fn as_str(&self) -> &str {
24        &self.0
25    }
26}
27
28/// Monotonic caller-owned version for a source input.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct SourceVersion(pub u64);
31
32/// Deterministic content digest for a source input.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
34pub struct SourceDigest(u64);
35
36impl SourceDigest {
37    pub fn from_source(source: &str) -> Self {
38        let mut hash = 0xcbf29ce484222325u64;
39        for byte in source.as_bytes() {
40            hash ^= u64::from(*byte);
41            hash = hash.wrapping_mul(0x100000001b3);
42        }
43        Self(hash)
44    }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum SourceUpdate {
49    Inserted,
50    Changed,
51    Unchanged,
52}
53
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55pub struct AnalysisStats {
56    pub lex_runs: usize,
57    pub parse_runs: usize,
58    pub typecheck_runs: usize,
59}
60
61#[derive(Debug, Clone)]
62pub struct ParseOutput {
63    pub source: String,
64    pub program: Vec<SNode>,
65}
66
67#[derive(Debug, Clone)]
68pub struct TypeCheckOutput {
69    pub source: String,
70    pub program: Vec<SNode>,
71    pub diagnostics: Vec<TypeDiagnostic>,
72    pub inlay_hints: Vec<InlayHintInfo>,
73}
74
75#[derive(Debug, Clone)]
76pub enum AnalysisError {
77    MissingSource(SourceId),
78    Lex {
79        source: String,
80        error: LexerError,
81    },
82    Parse {
83        source: String,
84        errors: Vec<ParserError>,
85    },
86}
87
88impl AnalysisError {
89    pub fn source(&self) -> Option<&str> {
90        match self {
91            AnalysisError::MissingSource(_) => None,
92            AnalysisError::Lex { source, .. } | AnalysisError::Parse { source, .. } => Some(source),
93        }
94    }
95}
96
97#[derive(Debug, Clone, Default)]
98pub struct TypeCheckConfig {
99    pub strict_types: bool,
100    pub imported_names: Option<HashSet<String>>,
101    pub imported_type_decls: Vec<SNode>,
102    pub imported_callable_decls: Vec<SNode>,
103}
104
105impl TypeCheckConfig {
106    pub fn new() -> Self {
107        Self::default()
108    }
109
110    pub fn with_strict_types(mut self, strict_types: bool) -> Self {
111        self.strict_types = strict_types;
112        self
113    }
114
115    pub fn with_imported_names(mut self, imported_names: Option<HashSet<String>>) -> Self {
116        self.imported_names = imported_names;
117        self
118    }
119
120    pub fn with_imported_type_decls(mut self, imported_type_decls: Vec<SNode>) -> Self {
121        self.imported_type_decls = imported_type_decls;
122        self
123    }
124
125    pub fn with_imported_callable_decls(mut self, imported_callable_decls: Vec<SNode>) -> Self {
126        self.imported_callable_decls = imported_callable_decls;
127        self
128    }
129
130    fn cache_key(&self) -> TypeCheckCacheKey {
131        let mut imported_names = self
132            .imported_names
133            .as_ref()
134            .map(|names| names.iter().cloned().collect::<Vec<_>>());
135        if let Some(names) = &mut imported_names {
136            names.sort();
137        }
138        TypeCheckCacheKey {
139            strict_types: self.strict_types,
140            imported_names,
141            imported_type_decls_digest: debug_digest(&self.imported_type_decls),
142            imported_callable_decls_digest: debug_digest(&self.imported_callable_decls),
143        }
144    }
145
146    fn build_checker(&self) -> TypeChecker {
147        let mut checker = TypeChecker::with_strict_types(self.strict_types);
148        if let Some(imported) = self.imported_names.clone() {
149            checker = checker.with_imported_names(imported);
150        }
151        if !self.imported_type_decls.is_empty() {
152            checker = checker.with_imported_type_decls(self.imported_type_decls.clone());
153        }
154        if !self.imported_callable_decls.is_empty() {
155            checker = checker.with_imported_callable_decls(self.imported_callable_decls.clone());
156        }
157        checker
158    }
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162struct TypeCheckCacheKey {
163    strict_types: bool,
164    imported_names: Option<Vec<String>>,
165    imported_type_decls_digest: u64,
166    imported_callable_decls_digest: u64,
167}
168
169#[derive(Debug, Clone)]
170struct CachedTypeCheck {
171    diagnostics: Vec<TypeDiagnostic>,
172    inlay_hints: Vec<InlayHintInfo>,
173}
174
175#[derive(Debug, Clone)]
176struct SourceEntry {
177    source: String,
178    version: SourceVersion,
179    digest: SourceDigest,
180    tokens: Option<Result<Vec<Token>, LexerError>>,
181    program: Option<Result<Vec<SNode>, Vec<ParserError>>>,
182    typechecks: HashMap<TypeCheckCacheKey, CachedTypeCheck>,
183}
184
185impl SourceEntry {
186    fn new(source: String, version: SourceVersion, digest: SourceDigest) -> Self {
187        Self {
188            source,
189            version,
190            digest,
191            tokens: None,
192            program: None,
193            typechecks: HashMap::new(),
194        }
195    }
196
197    fn replace_source(&mut self, source: String, version: SourceVersion, digest: SourceDigest) {
198        self.source = source;
199        self.version = version;
200        self.digest = digest;
201        self.tokens = None;
202        self.program = None;
203        self.typechecks.clear();
204    }
205}
206
207/// Persistent query cache for lexing, parsing, and type checking Harn sources.
208#[derive(Debug, Default)]
209pub struct AnalysisDatabase {
210    entries: HashMap<SourceId, SourceEntry>,
211    stats: AnalysisStats,
212}
213
214impl AnalysisDatabase {
215    pub fn new() -> Self {
216        Self::default()
217    }
218
219    pub fn stats(&self) -> AnalysisStats {
220        self.stats.clone()
221    }
222
223    pub fn set_source(
224        &mut self,
225        id: SourceId,
226        source: String,
227        version: SourceVersion,
228    ) -> SourceUpdate {
229        let digest = SourceDigest::from_source(&source);
230        match self.entries.get_mut(&id) {
231            None => {
232                self.entries
233                    .insert(id, SourceEntry::new(source, version, digest));
234                SourceUpdate::Inserted
235            }
236            Some(entry) if entry.digest == digest => {
237                entry.version = version;
238                SourceUpdate::Unchanged
239            }
240            Some(entry) => {
241                entry.replace_source(source, version, digest);
242                SourceUpdate::Changed
243            }
244        }
245    }
246
247    pub fn parse(&mut self, id: &SourceId) -> Result<ParseOutput, AnalysisError> {
248        let mut lexed = false;
249        let mut parsed_now = false;
250        let entry = self.entry_mut(id)?;
251        if entry.tokens.is_none() {
252            lexed = true;
253            let mut lexer = Lexer::new(&entry.source);
254            entry.tokens = Some(lexer.tokenize());
255        }
256        let tokens = match entry.tokens.as_ref().expect("tokens initialized") {
257            Ok(tokens) => tokens.clone(),
258            Err(error) => {
259                let source = entry.source.clone();
260                let error = error.clone();
261                if lexed {
262                    self.stats.lex_runs += 1;
263                }
264                return Err(AnalysisError::Lex { source, error });
265            }
266        };
267
268        if entry.program.is_none() {
269            parsed_now = true;
270            let mut parser = Parser::new(tokens);
271            entry.program = Some(match parser.parse() {
272                Ok(program) => Ok(program),
273                Err(error) => {
274                    let mut errors = parser.all_errors().to_vec();
275                    if errors.is_empty() {
276                        errors.push(error);
277                    }
278                    Err(errors)
279                }
280            });
281        }
282
283        let result = match entry.program.as_ref().expect("program initialized") {
284            Ok(program) => Ok(ParseOutput {
285                source: entry.source.clone(),
286                program: program.clone(),
287            }),
288            Err(errors) => Err(AnalysisError::Parse {
289                source: entry.source.clone(),
290                errors: errors.clone(),
291            }),
292        };
293        if lexed {
294            self.stats.lex_runs += 1;
295        }
296        if parsed_now {
297            self.stats.parse_runs += 1;
298        }
299        result
300    }
301
302    pub fn typecheck(
303        &mut self,
304        id: &SourceId,
305        config: TypeCheckConfig,
306    ) -> Result<TypeCheckOutput, AnalysisError> {
307        let parsed = self.parse(id)?;
308        let key = config.cache_key();
309        if let Some(cached) = self
310            .entries
311            .get(id)
312            .expect("parse verified source entry")
313            .typechecks
314            .get(&key)
315        {
316            return Ok(TypeCheckOutput {
317                source: parsed.source,
318                program: parsed.program,
319                diagnostics: cached.diagnostics.clone(),
320                inlay_hints: cached.inlay_hints.clone(),
321            });
322        }
323
324        self.stats.typecheck_runs += 1;
325        let (diagnostics, inlay_hints) = config
326            .build_checker()
327            .check_with_hints(&parsed.program, &parsed.source);
328        let cached = CachedTypeCheck {
329            diagnostics: diagnostics.clone(),
330            inlay_hints: inlay_hints.clone(),
331        };
332        self.entries
333            .get_mut(id)
334            .expect("parse verified source entry")
335            .typechecks
336            .insert(key, cached);
337        Ok(TypeCheckOutput {
338            source: parsed.source,
339            program: parsed.program,
340            diagnostics,
341            inlay_hints,
342        })
343    }
344
345    fn entry_mut(&mut self, id: &SourceId) -> Result<&mut SourceEntry, AnalysisError> {
346        self.entries
347            .get_mut(id)
348            .ok_or_else(|| AnalysisError::MissingSource(id.clone()))
349    }
350}
351
352fn debug_digest<T: std::fmt::Debug>(value: &T) -> u64 {
353    let mut hasher = StableHasher::default();
354    format!("{value:?}").hash(&mut hasher);
355    hasher.finish()
356}
357
358#[derive(Default)]
359struct StableHasher(u64);
360
361impl Hasher for StableHasher {
362    fn finish(&self) -> u64 {
363        self.0
364    }
365
366    fn write(&mut self, bytes: &[u8]) {
367        let mut hash = if self.0 == 0 {
368            0xcbf29ce484222325u64
369        } else {
370            self.0
371        };
372        for byte in bytes {
373            hash ^= u64::from(*byte);
374            hash = hash.wrapping_mul(0x100000001b3);
375        }
376        self.0 = hash;
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::DiagnosticSeverity;
384
385    fn source_id() -> SourceId {
386        SourceId::new("test.harn")
387    }
388
389    #[test]
390    fn parse_reuses_cached_program_for_unchanged_source() {
391        let mut db = AnalysisDatabase::new();
392        let id = source_id();
393        assert_eq!(
394            db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1)),
395            SourceUpdate::Inserted
396        );
397        db.parse(&id).expect("initial parse");
398        db.parse(&id).expect("cached parse");
399        assert_eq!(db.stats().lex_runs, 1);
400        assert_eq!(db.stats().parse_runs, 1);
401
402        assert_eq!(
403            db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(2)),
404            SourceUpdate::Unchanged
405        );
406        db.parse(&id).expect("same digest parse");
407        assert_eq!(db.stats().lex_runs, 1);
408        assert_eq!(db.stats().parse_runs, 1);
409    }
410
411    #[test]
412    fn source_change_invalidates_parse_and_typecheck_outputs() {
413        let mut db = AnalysisDatabase::new();
414        let id = source_id();
415        db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1));
416        db.typecheck(&id, TypeCheckConfig::new())
417            .expect("initial check");
418        assert_eq!(
419            db.set_source(id.clone(), "let x = 2\n".to_string(), SourceVersion(2)),
420            SourceUpdate::Changed
421        );
422        db.typecheck(&id, TypeCheckConfig::new())
423            .expect("changed check");
424        assert_eq!(db.stats().lex_runs, 2);
425        assert_eq!(db.stats().parse_runs, 2);
426        assert_eq!(db.stats().typecheck_runs, 2);
427    }
428
429    #[test]
430    fn typecheck_cache_is_keyed_by_options() {
431        let mut db = AnalysisDatabase::new();
432        let id = source_id();
433        db.set_source(
434            id.clone(),
435            "pipeline main() {\n  let x = read_file(\"a\")\n  log(x.foo)\n}\n".to_string(),
436            SourceVersion(1),
437        );
438        db.typecheck(&id, TypeCheckConfig::new())
439            .expect("default check");
440        db.typecheck(&id, TypeCheckConfig::new())
441            .expect("cached default check");
442        db.typecheck(&id, TypeCheckConfig::new().with_strict_types(true))
443            .expect("strict check");
444        assert_eq!(db.stats().typecheck_runs, 2);
445    }
446
447    #[test]
448    fn typecheck_diagnostics_are_cached_with_hints() {
449        let mut db = AnalysisDatabase::new();
450        let id = source_id();
451        db.set_source(
452            id.clone(),
453            "pipeline main() {\n  let x: int = \"nope\"\n}\n".to_string(),
454            SourceVersion(1),
455        );
456        let first = db.typecheck(&id, TypeCheckConfig::new()).expect("check");
457        let second = db.typecheck(&id, TypeCheckConfig::new()).expect("cached");
458        assert!(first
459            .diagnostics
460            .iter()
461            .any(|diag| diag.severity == DiagnosticSeverity::Error));
462        assert_eq!(first.diagnostics.len(), second.diagnostics.len());
463        assert_eq!(db.stats().typecheck_runs, 1);
464    }
465}