Skip to main content

harn_parser/
analysis.rs

1use std::collections::{HashMap, HashSet};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5use harn_lexer::{Lexer, LexerError, Token};
6
7use crate::InlayHintInfo;
8use crate::{Parser, ParserError, SNode, TypeChecker, TypeDiagnostic};
9
10/// Stable source identity used by the incremental analysis cache.
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct SourceId(String);
13
14impl SourceId {
15    pub fn new(value: impl Into<String>) -> Self {
16        Self(value.into())
17    }
18
19    pub fn path(path: &Path) -> Self {
20        Self(path.to_string_lossy().into_owned())
21    }
22
23    pub fn as_str(&self) -> &str {
24        &self.0
25    }
26}
27
28/// Monotonic caller-owned version for a source input.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct SourceVersion(pub u64);
31
32/// Deterministic content digest for a source input.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
34pub struct SourceDigest(u64);
35
36impl SourceDigest {
37    pub fn from_source(source: &str) -> Self {
38        let mut hash = 0xcbf29ce484222325u64;
39        for byte in source.as_bytes() {
40            hash ^= u64::from(*byte);
41            hash = hash.wrapping_mul(0x100000001b3);
42        }
43        Self(hash)
44    }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum SourceUpdate {
49    Inserted,
50    Changed,
51    Unchanged,
52}
53
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55pub struct AnalysisStats {
56    pub lex_runs: usize,
57    pub parse_runs: usize,
58    pub typecheck_runs: usize,
59}
60
61#[derive(Debug, Clone)]
62pub struct ParseOutput {
63    pub source: String,
64    pub program: Vec<SNode>,
65}
66
67#[derive(Debug, Clone)]
68pub struct TypeCheckOutput {
69    pub source: String,
70    pub program: Vec<SNode>,
71    pub diagnostics: Vec<TypeDiagnostic>,
72    pub inlay_hints: Vec<InlayHintInfo>,
73}
74
75#[derive(Debug, Clone)]
76pub enum AnalysisError {
77    MissingSource(SourceId),
78    Lex {
79        source: String,
80        error: LexerError,
81    },
82    Parse {
83        source: String,
84        errors: Vec<ParserError>,
85    },
86}
87
88impl AnalysisError {
89    pub fn source(&self) -> Option<&str> {
90        match self {
91            AnalysisError::MissingSource(_) => None,
92            AnalysisError::Lex { source, .. } | AnalysisError::Parse { source, .. } => Some(source),
93        }
94    }
95}
96
97#[derive(Debug, Clone, Default)]
98pub struct TypeCheckConfig {
99    pub strict_types: bool,
100    pub imported_names: Option<HashSet<String>>,
101    pub imported_type_decls: Vec<SNode>,
102    pub imported_callable_decls: Vec<SNode>,
103}
104
105impl TypeCheckConfig {
106    pub fn new() -> Self {
107        Self::default()
108    }
109
110    pub fn with_strict_types(mut self, strict_types: bool) -> Self {
111        self.strict_types = strict_types;
112        self
113    }
114
115    pub fn with_imported_names(mut self, imported_names: Option<HashSet<String>>) -> Self {
116        self.imported_names = imported_names;
117        self
118    }
119
120    pub fn with_imported_type_decls(mut self, imported_type_decls: Vec<SNode>) -> Self {
121        self.imported_type_decls = imported_type_decls;
122        self
123    }
124
125    pub fn with_imported_callable_decls(mut self, imported_callable_decls: Vec<SNode>) -> Self {
126        self.imported_callable_decls = imported_callable_decls;
127        self
128    }
129
130    fn cache_key(&self) -> TypeCheckCacheKey {
131        let mut imported_names = self
132            .imported_names
133            .as_ref()
134            .map(|names| names.iter().cloned().collect::<Vec<_>>());
135        if let Some(names) = &mut imported_names {
136            names.sort();
137        }
138        TypeCheckCacheKey {
139            strict_types: self.strict_types,
140            imported_names,
141            imported_type_decls_digest: debug_digest(&self.imported_type_decls),
142            imported_callable_decls_digest: debug_digest(&self.imported_callable_decls),
143        }
144    }
145
146    fn build_checker(&self) -> TypeChecker {
147        let mut checker = TypeChecker::with_strict_types(self.strict_types);
148        if let Some(imported) = self.imported_names.clone() {
149            checker = checker.with_imported_names(imported);
150        }
151        if !self.imported_type_decls.is_empty() {
152            checker = checker.with_imported_type_decls(self.imported_type_decls.clone());
153        }
154        if !self.imported_callable_decls.is_empty() {
155            checker = checker.with_imported_callable_decls(self.imported_callable_decls.clone());
156        }
157        checker
158    }
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162struct TypeCheckCacheKey {
163    strict_types: bool,
164    imported_names: Option<Vec<String>>,
165    imported_type_decls_digest: u64,
166    imported_callable_decls_digest: u64,
167}
168
169#[derive(Debug, Clone)]
170struct CachedTypeCheck {
171    diagnostics: Vec<TypeDiagnostic>,
172    inlay_hints: Vec<InlayHintInfo>,
173}
174
175#[derive(Debug, Clone)]
176struct SourceEntry {
177    source: String,
178    version: SourceVersion,
179    digest: SourceDigest,
180    tokens: Option<Result<Vec<Token>, LexerError>>,
181    program: Option<Result<Vec<SNode>, Vec<ParserError>>>,
182    typechecks: HashMap<TypeCheckCacheKey, CachedTypeCheck>,
183}
184
185impl SourceEntry {
186    fn new(source: String, version: SourceVersion, digest: SourceDigest) -> Self {
187        Self {
188            source,
189            version,
190            digest,
191            tokens: None,
192            program: None,
193            typechecks: HashMap::new(),
194        }
195    }
196
197    fn replace_source(&mut self, source: String, version: SourceVersion, digest: SourceDigest) {
198        self.source = source;
199        self.version = version;
200        self.digest = digest;
201        self.tokens = None;
202        self.program = None;
203        self.typechecks.clear();
204    }
205}
206
207/// Persistent query cache for lexing, parsing, and type checking Harn sources.
208#[derive(Debug, Default)]
209pub struct AnalysisDatabase {
210    entries: HashMap<SourceId, SourceEntry>,
211    stats: AnalysisStats,
212}
213
214impl AnalysisDatabase {
215    pub fn new() -> Self {
216        Self::default()
217    }
218
219    pub fn stats(&self) -> AnalysisStats {
220        self.stats.clone()
221    }
222
223    pub fn set_source(
224        &mut self,
225        id: SourceId,
226        source: String,
227        version: SourceVersion,
228    ) -> SourceUpdate {
229        let digest = SourceDigest::from_source(&source);
230        match self.entries.get_mut(&id) {
231            None => {
232                self.entries
233                    .insert(id, SourceEntry::new(source, version, digest));
234                SourceUpdate::Inserted
235            }
236            Some(entry) if entry.digest == digest => {
237                entry.version = version;
238                SourceUpdate::Unchanged
239            }
240            Some(entry) => {
241                entry.replace_source(source, version, digest);
242                SourceUpdate::Changed
243            }
244        }
245    }
246
247    pub fn set_parsed_source(
248        &mut self,
249        id: SourceId,
250        source: String,
251        version: SourceVersion,
252        program: Vec<SNode>,
253    ) -> SourceUpdate {
254        let digest = SourceDigest::from_source(&source);
255        match self.entries.get_mut(&id) {
256            None => {
257                let mut entry = SourceEntry::new(source, version, digest);
258                entry.program = Some(Ok(program));
259                self.entries.insert(id, entry);
260                SourceUpdate::Inserted
261            }
262            Some(entry) if entry.digest == digest => {
263                entry.version = version;
264                entry.program = Some(Ok(program));
265                SourceUpdate::Unchanged
266            }
267            Some(entry) => {
268                entry.replace_source(source, version, digest);
269                entry.program = Some(Ok(program));
270                SourceUpdate::Changed
271            }
272        }
273    }
274
275    pub fn parse(&mut self, id: &SourceId) -> Result<ParseOutput, AnalysisError> {
276        if let Some(entry) = self.entries.get(id) {
277            if let Some(program) = &entry.program {
278                return match program {
279                    Ok(program) => Ok(ParseOutput {
280                        source: entry.source.clone(),
281                        program: program.clone(),
282                    }),
283                    Err(errors) => Err(AnalysisError::Parse {
284                        source: entry.source.clone(),
285                        errors: errors.clone(),
286                    }),
287                };
288            }
289        }
290
291        let mut lexed = false;
292        let mut parsed_now = false;
293        let entry = self.entry_mut(id)?;
294        if entry.tokens.is_none() {
295            lexed = true;
296            let mut lexer = Lexer::new(&entry.source);
297            entry.tokens = Some(lexer.tokenize());
298        }
299        let tokens = match entry.tokens.as_ref().expect("tokens initialized") {
300            Ok(tokens) => tokens.clone(),
301            Err(error) => {
302                let source = entry.source.clone();
303                let error = error.clone();
304                if lexed {
305                    self.stats.lex_runs += 1;
306                }
307                return Err(AnalysisError::Lex { source, error });
308            }
309        };
310
311        if entry.program.is_none() {
312            parsed_now = true;
313            let mut parser = Parser::new(tokens);
314            entry.program = Some(match parser.parse() {
315                Ok(program) => Ok(program),
316                Err(error) => {
317                    let mut errors = parser.all_errors().to_vec();
318                    if errors.is_empty() {
319                        errors.push(error);
320                    }
321                    Err(errors)
322                }
323            });
324        }
325
326        let result = match entry.program.as_ref().expect("program initialized") {
327            Ok(program) => Ok(ParseOutput {
328                source: entry.source.clone(),
329                program: program.clone(),
330            }),
331            Err(errors) => Err(AnalysisError::Parse {
332                source: entry.source.clone(),
333                errors: errors.clone(),
334            }),
335        };
336        if lexed {
337            self.stats.lex_runs += 1;
338        }
339        if parsed_now {
340            self.stats.parse_runs += 1;
341        }
342        result
343    }
344
345    pub fn typecheck(
346        &mut self,
347        id: &SourceId,
348        config: TypeCheckConfig,
349    ) -> Result<TypeCheckOutput, AnalysisError> {
350        let parsed = self.parse(id)?;
351        let key = config.cache_key();
352        if let Some(cached) = self
353            .entries
354            .get(id)
355            .expect("parse verified source entry")
356            .typechecks
357            .get(&key)
358        {
359            return Ok(TypeCheckOutput {
360                source: parsed.source,
361                program: parsed.program,
362                diagnostics: cached.diagnostics.clone(),
363                inlay_hints: cached.inlay_hints.clone(),
364            });
365        }
366
367        self.stats.typecheck_runs += 1;
368        let (diagnostics, inlay_hints) = config
369            .build_checker()
370            .check_with_hints(&parsed.program, &parsed.source);
371        let cached = CachedTypeCheck {
372            diagnostics: diagnostics.clone(),
373            inlay_hints: inlay_hints.clone(),
374        };
375        self.entries
376            .get_mut(id)
377            .expect("parse verified source entry")
378            .typechecks
379            .insert(key, cached);
380        Ok(TypeCheckOutput {
381            source: parsed.source,
382            program: parsed.program,
383            diagnostics,
384            inlay_hints,
385        })
386    }
387
388    fn entry_mut(&mut self, id: &SourceId) -> Result<&mut SourceEntry, AnalysisError> {
389        self.entries
390            .get_mut(id)
391            .ok_or_else(|| AnalysisError::MissingSource(id.clone()))
392    }
393}
394
395fn debug_digest<T: std::fmt::Debug>(value: &T) -> u64 {
396    let mut hasher = StableHasher::default();
397    format!("{value:?}").hash(&mut hasher);
398    hasher.finish()
399}
400
401#[derive(Default)]
402struct StableHasher(u64);
403
404impl Hasher for StableHasher {
405    fn finish(&self) -> u64 {
406        self.0
407    }
408
409    fn write(&mut self, bytes: &[u8]) {
410        let mut hash = if self.0 == 0 {
411            0xcbf29ce484222325u64
412        } else {
413            self.0
414        };
415        for byte in bytes {
416            hash ^= u64::from(*byte);
417            hash = hash.wrapping_mul(0x100000001b3);
418        }
419        self.0 = hash;
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426    use crate::DiagnosticSeverity;
427
428    fn source_id() -> SourceId {
429        SourceId::new("test.harn")
430    }
431
432    #[test]
433    fn parse_reuses_cached_program_for_unchanged_source() {
434        let mut db = AnalysisDatabase::new();
435        let id = source_id();
436        assert_eq!(
437            db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1)),
438            SourceUpdate::Inserted
439        );
440        db.parse(&id).expect("initial parse");
441        db.parse(&id).expect("cached parse");
442        assert_eq!(db.stats().lex_runs, 1);
443        assert_eq!(db.stats().parse_runs, 1);
444
445        assert_eq!(
446            db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(2)),
447            SourceUpdate::Unchanged
448        );
449        db.parse(&id).expect("same digest parse");
450        assert_eq!(db.stats().lex_runs, 1);
451        assert_eq!(db.stats().parse_runs, 1);
452    }
453
454    #[test]
455    fn source_change_invalidates_parse_and_typecheck_outputs() {
456        let mut db = AnalysisDatabase::new();
457        let id = source_id();
458        db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1));
459        db.typecheck(&id, TypeCheckConfig::new())
460            .expect("initial check");
461        assert_eq!(
462            db.set_source(id.clone(), "let x = 2\n".to_string(), SourceVersion(2)),
463            SourceUpdate::Changed
464        );
465        db.typecheck(&id, TypeCheckConfig::new())
466            .expect("changed check");
467        assert_eq!(db.stats().lex_runs, 2);
468        assert_eq!(db.stats().parse_runs, 2);
469        assert_eq!(db.stats().typecheck_runs, 2);
470    }
471
472    #[test]
473    fn parsed_source_seed_skips_lex_and_parse() {
474        let mut db = AnalysisDatabase::new();
475        let id = source_id();
476        let source = "let x = 1\n".to_string();
477        let mut lexer = Lexer::new(&source);
478        let tokens = lexer.tokenize().expect("tokenize");
479        let mut parser = Parser::new(tokens);
480        let program = parser.parse().expect("parse");
481
482        assert_eq!(
483            db.set_parsed_source(id.clone(), source, SourceVersion(1), program),
484            SourceUpdate::Inserted
485        );
486        db.typecheck(&id, TypeCheckConfig::new())
487            .expect("seeded check");
488        assert_eq!(db.stats().lex_runs, 0);
489        assert_eq!(db.stats().parse_runs, 0);
490        assert_eq!(db.stats().typecheck_runs, 1);
491    }
492
493    #[test]
494    fn typecheck_cache_is_keyed_by_options() {
495        let mut db = AnalysisDatabase::new();
496        let id = source_id();
497        db.set_source(
498            id.clone(),
499            "pipeline main() {\n  let x = read_file(\"a\")\n  log(x.foo)\n}\n".to_string(),
500            SourceVersion(1),
501        );
502        db.typecheck(&id, TypeCheckConfig::new())
503            .expect("default check");
504        db.typecheck(&id, TypeCheckConfig::new())
505            .expect("cached default check");
506        db.typecheck(&id, TypeCheckConfig::new().with_strict_types(true))
507            .expect("strict check");
508        assert_eq!(db.stats().typecheck_runs, 2);
509    }
510
511    #[test]
512    fn typecheck_diagnostics_are_cached_with_hints() {
513        let mut db = AnalysisDatabase::new();
514        let id = source_id();
515        db.set_source(
516            id.clone(),
517            "pipeline main() {\n  let x: int = \"nope\"\n}\n".to_string(),
518            SourceVersion(1),
519        );
520        let first = db.typecheck(&id, TypeCheckConfig::new()).expect("check");
521        let second = db.typecheck(&id, TypeCheckConfig::new()).expect("cached");
522        assert!(first
523            .diagnostics
524            .iter()
525            .any(|diag| diag.severity == DiagnosticSeverity::Error));
526        assert_eq!(first.diagnostics.len(), second.diagnostics.len());
527        assert_eq!(db.stats().typecheck_runs, 1);
528    }
529}