1use std::collections::{HashMap, HashSet};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5use harn_lexer::{Lexer, LexerError, Token};
6
7use crate::InlayHintInfo;
8use crate::{Parser, ParserError, SNode, TypeChecker, TypeDiagnostic};
9
10#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct SourceId(String);
13
14impl SourceId {
15 pub fn new(value: impl Into<String>) -> Self {
16 Self(value.into())
17 }
18
19 pub fn path(path: &Path) -> Self {
20 Self(path.to_string_lossy().into_owned())
21 }
22
23 pub fn as_str(&self) -> &str {
24 &self.0
25 }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct SourceVersion(pub u64);
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
34pub struct SourceDigest(u64);
35
36impl SourceDigest {
37 pub fn from_source(source: &str) -> Self {
38 let mut hash = 0xcbf29ce484222325u64;
39 for byte in source.as_bytes() {
40 hash ^= u64::from(*byte);
41 hash = hash.wrapping_mul(0x100000001b3);
42 }
43 Self(hash)
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum SourceUpdate {
49 Inserted,
50 Changed,
51 Unchanged,
52}
53
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55pub struct AnalysisStats {
56 pub lex_runs: usize,
57 pub parse_runs: usize,
58 pub typecheck_runs: usize,
59}
60
61#[derive(Debug, Clone)]
62pub struct ParseOutput {
63 pub source: String,
64 pub program: Vec<SNode>,
65}
66
67#[derive(Debug, Clone)]
68pub struct TypeCheckOutput {
69 pub source: String,
70 pub program: Vec<SNode>,
71 pub diagnostics: Vec<TypeDiagnostic>,
72 pub inlay_hints: Vec<InlayHintInfo>,
73}
74
75#[derive(Debug, Clone)]
76pub enum AnalysisError {
77 MissingSource(SourceId),
78 Lex {
79 source: String,
80 error: LexerError,
81 },
82 Parse {
83 source: String,
84 errors: Vec<ParserError>,
85 },
86}
87
88impl AnalysisError {
89 pub fn source(&self) -> Option<&str> {
90 match self {
91 AnalysisError::MissingSource(_) => None,
92 AnalysisError::Lex { source, .. } | AnalysisError::Parse { source, .. } => Some(source),
93 }
94 }
95}
96
97#[derive(Debug, Clone, Default)]
98pub struct TypeCheckConfig {
99 pub strict_types: bool,
100 pub imported_names: Option<HashSet<String>>,
101 pub imported_type_decls: Vec<SNode>,
102 pub imported_callable_decls: Vec<SNode>,
103}
104
105impl TypeCheckConfig {
106 pub fn new() -> Self {
107 Self::default()
108 }
109
110 pub fn with_strict_types(mut self, strict_types: bool) -> Self {
111 self.strict_types = strict_types;
112 self
113 }
114
115 pub fn with_imported_names(mut self, imported_names: Option<HashSet<String>>) -> Self {
116 self.imported_names = imported_names;
117 self
118 }
119
120 pub fn with_imported_type_decls(mut self, imported_type_decls: Vec<SNode>) -> Self {
121 self.imported_type_decls = imported_type_decls;
122 self
123 }
124
125 pub fn with_imported_callable_decls(mut self, imported_callable_decls: Vec<SNode>) -> Self {
126 self.imported_callable_decls = imported_callable_decls;
127 self
128 }
129
130 fn cache_key(&self) -> TypeCheckCacheKey {
131 let mut imported_names = self
132 .imported_names
133 .as_ref()
134 .map(|names| names.iter().cloned().collect::<Vec<_>>());
135 if let Some(names) = &mut imported_names {
136 names.sort();
137 }
138 TypeCheckCacheKey {
139 strict_types: self.strict_types,
140 imported_names,
141 imported_type_decls_digest: debug_digest(&self.imported_type_decls),
142 imported_callable_decls_digest: debug_digest(&self.imported_callable_decls),
143 }
144 }
145
146 fn build_checker(&self) -> TypeChecker {
147 let mut checker = TypeChecker::with_strict_types(self.strict_types);
148 if let Some(imported) = self.imported_names.clone() {
149 checker = checker.with_imported_names(imported);
150 }
151 if !self.imported_type_decls.is_empty() {
152 checker = checker.with_imported_type_decls(self.imported_type_decls.clone());
153 }
154 if !self.imported_callable_decls.is_empty() {
155 checker = checker.with_imported_callable_decls(self.imported_callable_decls.clone());
156 }
157 checker
158 }
159}
160
161#[derive(Debug, Clone, PartialEq, Eq, Hash)]
162struct TypeCheckCacheKey {
163 strict_types: bool,
164 imported_names: Option<Vec<String>>,
165 imported_type_decls_digest: u64,
166 imported_callable_decls_digest: u64,
167}
168
169#[derive(Debug, Clone)]
170struct CachedTypeCheck {
171 diagnostics: Vec<TypeDiagnostic>,
172 inlay_hints: Vec<InlayHintInfo>,
173}
174
175#[derive(Debug, Clone)]
176struct SourceEntry {
177 source: String,
178 version: SourceVersion,
179 digest: SourceDigest,
180 tokens: Option<Result<Vec<Token>, LexerError>>,
181 program: Option<Result<Vec<SNode>, Vec<ParserError>>>,
182 typechecks: HashMap<TypeCheckCacheKey, CachedTypeCheck>,
183}
184
185impl SourceEntry {
186 fn new(source: String, version: SourceVersion, digest: SourceDigest) -> Self {
187 Self {
188 source,
189 version,
190 digest,
191 tokens: None,
192 program: None,
193 typechecks: HashMap::new(),
194 }
195 }
196
197 fn replace_source(&mut self, source: String, version: SourceVersion, digest: SourceDigest) {
198 self.source = source;
199 self.version = version;
200 self.digest = digest;
201 self.tokens = None;
202 self.program = None;
203 self.typechecks.clear();
204 }
205}
206
207#[derive(Debug, Default)]
209pub struct AnalysisDatabase {
210 entries: HashMap<SourceId, SourceEntry>,
211 stats: AnalysisStats,
212}
213
214impl AnalysisDatabase {
215 pub fn new() -> Self {
216 Self::default()
217 }
218
219 pub fn stats(&self) -> AnalysisStats {
220 self.stats.clone()
221 }
222
223 pub fn set_source(
224 &mut self,
225 id: SourceId,
226 source: String,
227 version: SourceVersion,
228 ) -> SourceUpdate {
229 let digest = SourceDigest::from_source(&source);
230 match self.entries.get_mut(&id) {
231 None => {
232 self.entries
233 .insert(id, SourceEntry::new(source, version, digest));
234 SourceUpdate::Inserted
235 }
236 Some(entry) if entry.digest == digest => {
237 entry.version = version;
238 SourceUpdate::Unchanged
239 }
240 Some(entry) => {
241 entry.replace_source(source, version, digest);
242 SourceUpdate::Changed
243 }
244 }
245 }
246
247 pub fn parse(&mut self, id: &SourceId) -> Result<ParseOutput, AnalysisError> {
248 let mut lexed = false;
249 let mut parsed_now = false;
250 let entry = self.entry_mut(id)?;
251 if entry.tokens.is_none() {
252 lexed = true;
253 let mut lexer = Lexer::new(&entry.source);
254 entry.tokens = Some(lexer.tokenize());
255 }
256 let tokens = match entry.tokens.as_ref().expect("tokens initialized") {
257 Ok(tokens) => tokens.clone(),
258 Err(error) => {
259 let source = entry.source.clone();
260 let error = error.clone();
261 if lexed {
262 self.stats.lex_runs += 1;
263 }
264 return Err(AnalysisError::Lex { source, error });
265 }
266 };
267
268 if entry.program.is_none() {
269 parsed_now = true;
270 let mut parser = Parser::new(tokens);
271 entry.program = Some(match parser.parse() {
272 Ok(program) => Ok(program),
273 Err(error) => {
274 let mut errors = parser.all_errors().to_vec();
275 if errors.is_empty() {
276 errors.push(error);
277 }
278 Err(errors)
279 }
280 });
281 }
282
283 let result = match entry.program.as_ref().expect("program initialized") {
284 Ok(program) => Ok(ParseOutput {
285 source: entry.source.clone(),
286 program: program.clone(),
287 }),
288 Err(errors) => Err(AnalysisError::Parse {
289 source: entry.source.clone(),
290 errors: errors.clone(),
291 }),
292 };
293 if lexed {
294 self.stats.lex_runs += 1;
295 }
296 if parsed_now {
297 self.stats.parse_runs += 1;
298 }
299 result
300 }
301
302 pub fn typecheck(
303 &mut self,
304 id: &SourceId,
305 config: TypeCheckConfig,
306 ) -> Result<TypeCheckOutput, AnalysisError> {
307 let parsed = self.parse(id)?;
308 let key = config.cache_key();
309 if let Some(cached) = self
310 .entries
311 .get(id)
312 .expect("parse verified source entry")
313 .typechecks
314 .get(&key)
315 {
316 return Ok(TypeCheckOutput {
317 source: parsed.source,
318 program: parsed.program,
319 diagnostics: cached.diagnostics.clone(),
320 inlay_hints: cached.inlay_hints.clone(),
321 });
322 }
323
324 self.stats.typecheck_runs += 1;
325 let (diagnostics, inlay_hints) = config
326 .build_checker()
327 .check_with_hints(&parsed.program, &parsed.source);
328 let cached = CachedTypeCheck {
329 diagnostics: diagnostics.clone(),
330 inlay_hints: inlay_hints.clone(),
331 };
332 self.entries
333 .get_mut(id)
334 .expect("parse verified source entry")
335 .typechecks
336 .insert(key, cached);
337 Ok(TypeCheckOutput {
338 source: parsed.source,
339 program: parsed.program,
340 diagnostics,
341 inlay_hints,
342 })
343 }
344
345 fn entry_mut(&mut self, id: &SourceId) -> Result<&mut SourceEntry, AnalysisError> {
346 self.entries
347 .get_mut(id)
348 .ok_or_else(|| AnalysisError::MissingSource(id.clone()))
349 }
350}
351
352fn debug_digest<T: std::fmt::Debug>(value: &T) -> u64 {
353 let mut hasher = StableHasher::default();
354 format!("{value:?}").hash(&mut hasher);
355 hasher.finish()
356}
357
358#[derive(Default)]
359struct StableHasher(u64);
360
361impl Hasher for StableHasher {
362 fn finish(&self) -> u64 {
363 self.0
364 }
365
366 fn write(&mut self, bytes: &[u8]) {
367 let mut hash = if self.0 == 0 {
368 0xcbf29ce484222325u64
369 } else {
370 self.0
371 };
372 for byte in bytes {
373 hash ^= u64::from(*byte);
374 hash = hash.wrapping_mul(0x100000001b3);
375 }
376 self.0 = hash;
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use crate::DiagnosticSeverity;
384
385 fn source_id() -> SourceId {
386 SourceId::new("test.harn")
387 }
388
389 #[test]
390 fn parse_reuses_cached_program_for_unchanged_source() {
391 let mut db = AnalysisDatabase::new();
392 let id = source_id();
393 assert_eq!(
394 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1)),
395 SourceUpdate::Inserted
396 );
397 db.parse(&id).expect("initial parse");
398 db.parse(&id).expect("cached parse");
399 assert_eq!(db.stats().lex_runs, 1);
400 assert_eq!(db.stats().parse_runs, 1);
401
402 assert_eq!(
403 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(2)),
404 SourceUpdate::Unchanged
405 );
406 db.parse(&id).expect("same digest parse");
407 assert_eq!(db.stats().lex_runs, 1);
408 assert_eq!(db.stats().parse_runs, 1);
409 }
410
411 #[test]
412 fn source_change_invalidates_parse_and_typecheck_outputs() {
413 let mut db = AnalysisDatabase::new();
414 let id = source_id();
415 db.set_source(id.clone(), "let x = 1\n".to_string(), SourceVersion(1));
416 db.typecheck(&id, TypeCheckConfig::new())
417 .expect("initial check");
418 assert_eq!(
419 db.set_source(id.clone(), "let x = 2\n".to_string(), SourceVersion(2)),
420 SourceUpdate::Changed
421 );
422 db.typecheck(&id, TypeCheckConfig::new())
423 .expect("changed check");
424 assert_eq!(db.stats().lex_runs, 2);
425 assert_eq!(db.stats().parse_runs, 2);
426 assert_eq!(db.stats().typecheck_runs, 2);
427 }
428
429 #[test]
430 fn typecheck_cache_is_keyed_by_options() {
431 let mut db = AnalysisDatabase::new();
432 let id = source_id();
433 db.set_source(
434 id.clone(),
435 "pipeline main() {\n let x = read_file(\"a\")\n log(x.foo)\n}\n".to_string(),
436 SourceVersion(1),
437 );
438 db.typecheck(&id, TypeCheckConfig::new())
439 .expect("default check");
440 db.typecheck(&id, TypeCheckConfig::new())
441 .expect("cached default check");
442 db.typecheck(&id, TypeCheckConfig::new().with_strict_types(true))
443 .expect("strict check");
444 assert_eq!(db.stats().typecheck_runs, 2);
445 }
446
447 #[test]
448 fn typecheck_diagnostics_are_cached_with_hints() {
449 let mut db = AnalysisDatabase::new();
450 let id = source_id();
451 db.set_source(
452 id.clone(),
453 "pipeline main() {\n let x: int = \"nope\"\n}\n".to_string(),
454 SourceVersion(1),
455 );
456 let first = db.typecheck(&id, TypeCheckConfig::new()).expect("check");
457 let second = db.typecheck(&id, TypeCheckConfig::new()).expect("cached");
458 assert!(first
459 .diagnostics
460 .iter()
461 .any(|diag| diag.severity == DiagnosticSeverity::Error));
462 assert_eq!(first.diagnostics.len(), second.diagnostics.len());
463 assert_eq!(db.stats().typecheck_runs, 1);
464 }
465}