1use std::collections::{HashMap, HashSet};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5use harn_lexer::{Lexer, LexerError, Token};
6
7use crate::InlayHintInfo;
8use crate::{Parser, ParserError, SNode, TypeChecker, TypeDiagnostic};
9
10#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct SourceId(String);
13
14impl SourceId {
15 pub fn new(value: impl Into<String>) -> Self {
16 Self(value.into())
17 }
18
19 pub fn path(path: &Path) -> Self {
20 Self(path.to_string_lossy().into_owned())
21 }
22
23 pub fn as_str(&self) -> &str {
24 &self.0
25 }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct SourceVersion(pub u64);
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
34pub struct SourceDigest(u64);
35
36impl SourceDigest {
37 pub fn from_source(source: &str) -> Self {
38 let mut hash = 0xcbf29ce484222325u64;
39 for byte in source.as_bytes() {
40 hash ^= u64::from(*byte);
41 hash = hash.wrapping_mul(0x100000001b3);
42 }
43 Self(hash)
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum SourceUpdate {
49 Inserted,
50 Changed,
51 Unchanged,
52}
53
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55pub struct AnalysisStats {
56 pub lex_runs: usize,
57 pub parse_runs: usize,
58 pub typecheck_runs: usize,
59}
60
61#[derive(Debug, Clone)]
62pub struct ParseOutput {
63 pub source: String,
64 pub program: Vec<SNode>,
65}
66
67#[derive(Debug, Clone)]
68pub struct TypeCheckOutput {
69 pub source: String,
70 pub program: Vec<SNode>,
71 pub diagnostics: Vec<TypeDiagnostic>,
72 pub inlay_hints: Vec<InlayHintInfo>,
73}
74
75#[derive(Debug, Clone)]
76pub enum AnalysisError {
77 MissingSource(SourceId),
78 Lex {
79 source: String,
80 error: LexerError,
81 },
82 Parse {
83 source: String,
84 errors: Vec<ParserError>,
85 },
86}
87
88impl AnalysisError {
89 pub fn source(&self) -> Option<&str> {
90 match self {
91 AnalysisError::MissingSource(_) => None,
92 AnalysisError::Lex { source, .. } | AnalysisError::Parse { source, .. } => Some(source),
93 }
94 }
95}
96
97#[derive(Debug, Clone, Default)]
98pub struct TypeCheckConfig {
99 pub strict_types: bool,
100 pub imported_names: Option<HashSet<String>>,
101 pub imported_type_decls: Vec<SNode>,
102 pub imported_callable_decls: Vec<SNode>,
103}
104
105impl TypeCheckConfig {
106 pub fn new() -> Self {
107 Self::default()
108 }
109
110 pub fn with_strict_types(mut self, strict_types: bool) -> Self {
111 self.strict_types = strict_types;
112 self
113 }
114
115 pub fn with_imported_names(mut self, imported_names: Option<HashSet<String>>) -> Self {
116 self.imported_names = imported_names;
117 self
118 }
119
120 pub fn with_imported_type_decls(mut self, imported_type_decls: Vec<SNode>) -> Self {
121 self.imported_type_decls = imported_type_decls;
122 self
123 }
124
125 pub fn with_imported_callable_decls(mut self, imported_callable_decls: Vec<SNode>) -> Self {
126 self.imported_callable_decls = imported_callable_decls;
127 self
128 }
129
130 fn cache_key(&self) -> TypeCheckCacheKey {
131 let mut imported_names = self
132 .imported_names
133 .as_ref()
134 .map(|names| names.iter().cloned().collect::<Vec<_>>());
135 if let Some(names) = &mut imported_names {
136 names.sort();
137 }
138 TypeCheckCacheKey {
139 strict_types: self.strict_types,
140 imported_names,
141 imported_type_decls_digest: debug_digest(&self.imported_type_decls),
142 imported_callable_decls_digest: debug_digest(&self.imported_callable_decls),
143 }
144 }
145
146 fn build_checker(&self) -> TypeChecker {
147 let mut checker = TypeChecker::with_strict_types(self.strict_types);
148 if let Some(imported) = self.imported_names.clone() {
149 checker = checker.with_imported_names(imported);
150 }
151 if !self.imported_type_decls.is_empty() {
152 checker = checker.with_imported_type_decls(self.imported_type_decls.clone());
153 }
154 if !self.imported_callable_decls.is_empty() {
155 checker = checker.with_imported_callable_decls(self.imported_callable_decls.clone());
156 }
157 checker
158 }
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162struct TypeCheckCacheKey {
163 strict_types: bool,
164 imported_names: Option<Vec<String>>,
165 imported_type_decls_digest: u64,
166 imported_callable_decls_digest: u64,
167}
168
169#[derive(Debug, Clone)]
170struct CachedTypeCheck {
171 diagnostics: Vec<TypeDiagnostic>,
172 inlay_hints: Vec<InlayHintInfo>,
173}
174
175#[derive(Debug, Clone)]
176struct SourceEntry {
177 source: String,
178 version: SourceVersion,
179 digest: SourceDigest,
180 tokens: Option<Result<Vec<Token>, LexerError>>,
181 program: Option<Result<Vec<SNode>, Vec<ParserError>>>,
182 typechecks: HashMap<TypeCheckCacheKey, CachedTypeCheck>,
183}
184
185impl SourceEntry {
186 fn new(source: String, version: SourceVersion, digest: SourceDigest) -> Self {
187 Self {
188 source,
189 version,
190 digest,
191 tokens: None,
192 program: None,
193 typechecks: HashMap::new(),
194 }
195 }
196
197 fn replace_source(&mut self, source: String, version: SourceVersion, digest: SourceDigest) {
198 self.source = source;
199 self.version = version;
200 self.digest = digest;
201 self.tokens = None;
202 self.program = None;
203 self.typechecks.clear();
204 }
205}
206
207#[derive(Debug, Default)]
209pub struct AnalysisDatabase {
210 entries: HashMap<SourceId, SourceEntry>,
211 stats: AnalysisStats,
212}
213
214impl AnalysisDatabase {
215 pub fn new() -> Self {
216 Self::default()
217 }
218
219 pub fn stats(&self) -> AnalysisStats {
220 self.stats.clone()
221 }
222
223 pub fn set_source(
224 &mut self,
225 id: SourceId,
226 source: String,
227 version: SourceVersion,
228 ) -> SourceUpdate {
229 let digest = SourceDigest::from_source(&source);
230 match self.entries.get_mut(&id) {
231 None => {
232 self.entries
233 .insert(id, SourceEntry::new(source, version, digest));
234 SourceUpdate::Inserted
235 }
236 Some(entry) if entry.digest == digest => {
237 entry.version = version;
238 SourceUpdate::Unchanged
239 }
240 Some(entry) => {
241 entry.replace_source(source, version, digest);
242 SourceUpdate::Changed
243 }
244 }
245 }
246
247 pub fn set_parsed_source(
248 &mut self,
249 id: SourceId,
250 source: String,
251 version: SourceVersion,
252 program: Vec<SNode>,
253 ) -> SourceUpdate {
254 let digest = SourceDigest::from_source(&source);
255 match self.entries.get_mut(&id) {
256 None => {
257 let mut entry = SourceEntry::new(source, version, digest);
258 entry.program = Some(Ok(program));
259 self.entries.insert(id, entry);
260 SourceUpdate::Inserted
261 }
262 Some(entry) if entry.digest == digest => {
263 entry.version = version;
264 entry.program = Some(Ok(program));
265 SourceUpdate::Unchanged
266 }
267 Some(entry) => {
268 entry.replace_source(source, version, digest);
269 entry.program = Some(Ok(program));
270 SourceUpdate::Changed
271 }
272 }
273 }
274
275 pub fn parse(&mut self, id: &SourceId) -> Result<ParseOutput, AnalysisError> {
276 if let Some(entry) = self.entries.get(id) {
277 if let Some(program) = &entry.program {
278 return match program {
279 Ok(program) => Ok(ParseOutput {
280 source: entry.source.clone(),
281 program: program.clone(),
282 }),
283 Err(errors) => Err(AnalysisError::Parse {
284 source: entry.source.clone(),
285 errors: errors.clone(),
286 }),
287 };
288 }
289 }
290
291 let mut lexed = false;
292 let mut parsed_now = false;
293 let entry = self.entry_mut(id)?;
294 if entry.tokens.is_none() {
295 lexed = true;
296 let mut lexer = Lexer::new(&entry.source);
297 entry.tokens = Some(lexer.tokenize());
298 }
299 let tokens = match entry.tokens.as_ref().expect("tokens initialized") {
300 Ok(tokens) => tokens.clone(),
301 Err(error) => {
302 let source = entry.source.clone();
303 let error = error.clone();
304 if lexed {
305 self.stats.lex_runs += 1;
306 }
307 return Err(AnalysisError::Lex { source, error });
308 }
309 };
310
311 if entry.program.is_none() {
312 parsed_now = true;
313 let mut parser = Parser::new(tokens);
314 entry.program = Some(match parser.parse() {
315 Ok(program) => Ok(program),
316 Err(error) => {
317 let mut errors = parser.all_errors().to_vec();
318 if errors.is_empty() {
319 errors.push(error);
320 }
321 Err(errors)
322 }
323 });
324 }
325
326 let result = match entry.program.as_ref().expect("program initialized") {
327 Ok(program) => Ok(ParseOutput {
328 source: entry.source.clone(),
329 program: program.clone(),
330 }),
331 Err(errors) => Err(AnalysisError::Parse {
332 source: entry.source.clone(),
333 errors: errors.clone(),
334 }),
335 };
336 if lexed {
337 self.stats.lex_runs += 1;
338 }
339 if parsed_now {
340 self.stats.parse_runs += 1;
341 }
342 result
343 }
344
345 pub fn typecheck(
346 &mut self,
347 id: &SourceId,
348 config: TypeCheckConfig,
349 ) -> Result<TypeCheckOutput, AnalysisError> {
350 let parsed = self.parse(id)?;
351 let key = config.cache_key();
352 if let Some(cached) = self
353 .entries
354 .get(id)
355 .expect("parse verified source entry")
356 .typechecks
357 .get(&key)
358 {
359 return Ok(TypeCheckOutput {
360 source: parsed.source,
361 program: parsed.program,
362 diagnostics: cached.diagnostics.clone(),
363 inlay_hints: cached.inlay_hints.clone(),
364 });
365 }
366
367 self.stats.typecheck_runs += 1;
368 let (diagnostics, inlay_hints) = config
369 .build_checker()
370 .check_with_hints(&parsed.program, &parsed.source);
371 let cached = CachedTypeCheck {
372 diagnostics: diagnostics.clone(),
373 inlay_hints: inlay_hints.clone(),
374 };
375 self.entries
376 .get_mut(id)
377 .expect("parse verified source entry")
378 .typechecks
379 .insert(key, cached);
380 Ok(TypeCheckOutput {
381 source: parsed.source,
382 program: parsed.program,
383 diagnostics,
384 inlay_hints,
385 })
386 }
387
388 fn entry_mut(&mut self, id: &SourceId) -> Result<&mut SourceEntry, AnalysisError> {
389 self.entries
390 .get_mut(id)
391 .ok_or_else(|| AnalysisError::MissingSource(id.clone()))
392 }
393}
394
395fn debug_digest<T: std::fmt::Debug>(value: &T) -> u64 {
396 let mut hasher = StableHasher::default();
397 format!("{value:?}").hash(&mut hasher);
398 hasher.finish()
399}
400
401#[derive(Default)]
402struct StableHasher(u64);
403
404impl Hasher for StableHasher {
405 fn finish(&self) -> u64 {
406 self.0
407 }
408
409 fn write(&mut self, bytes: &[u8]) {
410 let mut hash = if self.0 == 0 {
411 0xcbf29ce484222325u64
412 } else {
413 self.0
414 };
415 for byte in bytes {
416 hash ^= u64::from(*byte);
417 hash = hash.wrapping_mul(0x100000001b3);
418 }
419 self.0 = hash;
420 }
421}
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426 use crate::DiagnosticSeverity;
427
428 fn source_id() -> SourceId {
429 SourceId::new("test.harn")
430 }
431
432 #[test]
433 fn parse_reuses_cached_program_for_unchanged_source() {
434 let mut db = AnalysisDatabase::new();
435 let id = source_id();
436 assert_eq!(
437 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1)),
438 SourceUpdate::Inserted
439 );
440 db.parse(&id).expect("initial parse");
441 db.parse(&id).expect("cached parse");
442 assert_eq!(db.stats().lex_runs, 1);
443 assert_eq!(db.stats().parse_runs, 1);
444
445 assert_eq!(
446 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(2)),
447 SourceUpdate::Unchanged
448 );
449 db.parse(&id).expect("same digest parse");
450 assert_eq!(db.stats().lex_runs, 1);
451 assert_eq!(db.stats().parse_runs, 1);
452 }
453
454 #[test]
455 fn source_change_invalidates_parse_and_typecheck_outputs() {
456 let mut db = AnalysisDatabase::new();
457 let id = source_id();
458 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1));
459 db.typecheck(&id, TypeCheckConfig::new())
460 .expect("initial check");
461 assert_eq!(
462 db.set_source(id.clone(), "let x = 2\n".to_string(), SourceVersion(2)),
463 SourceUpdate::Changed
464 );
465 db.typecheck(&id, TypeCheckConfig::new())
466 .expect("changed check");
467 assert_eq!(db.stats().lex_runs, 2);
468 assert_eq!(db.stats().parse_runs, 2);
469 assert_eq!(db.stats().typecheck_runs, 2);
470 }
471
472 #[test]
473 fn parsed_source_seed_skips_lex_and_parse() {
474 let mut db = AnalysisDatabase::new();
475 let id = source_id();
476 let source = "let x = 1\n".to_string();
477 let mut lexer = Lexer::new(&source);
478 let tokens = lexer.tokenize().expect("tokenize");
479 let mut parser = Parser::new(tokens);
480 let program = parser.parse().expect("parse");
481
482 assert_eq!(
483 db.set_parsed_source(id.clone(), source, SourceVersion(1), program),
484 SourceUpdate::Inserted
485 );
486 db.typecheck(&id, TypeCheckConfig::new())
487 .expect("seeded check");
488 assert_eq!(db.stats().lex_runs, 0);
489 assert_eq!(db.stats().parse_runs, 0);
490 assert_eq!(db.stats().typecheck_runs, 1);
491 }
492
493 #[test]
494 fn typecheck_cache_is_keyed_by_options() {
495 let mut db = AnalysisDatabase::new();
496 let id = source_id();
497 db.set_source(
498 id.clone(),
499 "pipeline main() {\n let x = read_file(\"a\")\n log(x.foo)\n}\n".to_string(),
500 SourceVersion(1),
501 );
502 db.typecheck(&id, TypeCheckConfig::new())
503 .expect("default check");
504 db.typecheck(&id, TypeCheckConfig::new())
505 .expect("cached default check");
506 db.typecheck(&id, TypeCheckConfig::new().with_strict_types(true))
507 .expect("strict check");
508 assert_eq!(db.stats().typecheck_runs, 2);
509 }
510
511 #[test]
512 fn typecheck_diagnostics_are_cached_with_hints() {
513 let mut db = AnalysisDatabase::new();
514 let id = source_id();
515 db.set_source(
516 id.clone(),
517 "pipeline main() {\n let x: int = \"nope\"\n}\n".to_string(),
518 SourceVersion(1),
519 );
520 let first = db.typecheck(&id, TypeCheckConfig::new()).expect("check");
521 let second = db.typecheck(&id, TypeCheckConfig::new()).expect("cached");
522 assert!(first
523 .diagnostics
524 .iter()
525 .any(|diag| diag.severity == DiagnosticSeverity::Error));
526 assert_eq!(first.diagnostics.len(), second.diagnostics.len());
527 assert_eq!(db.stats().typecheck_runs, 1);
528 }
529}