1use crate::{Gcx, hir::SourceId, ty::GcxMut};
2use rayon::prelude::*;
3use solar_ast::{self as ast, Span};
4use solar_data_structures::{
5 index::{Idx, IndexVec, index_vec},
6 map::{FxHashMap, FxHashSet},
7 sync::Mutex,
8};
9use solar_interface::{
10 Result, Session,
11 config::CompilerStage,
12 diagnostics::{DiagCtxt, ErrorGuaranteed},
13 source_map::{FileName, FileResolver, ResolveError, SourceFile},
14};
15use solar_parse::{Lexer, Parser, unescape};
16use std::{fmt, path::Path, sync::Arc};
17use thread_local::ThreadLocal;
18
19#[must_use = "`ParsingContext::parse` must be called to parse the sources"]
35pub struct ParsingContext<'gcx> {
36 pub sess: &'gcx Session,
38 pub file_resolver: FileResolver<'gcx>,
40 pub(crate) sources: &'gcx mut Sources<'gcx>,
42 pub(crate) arenas: &'gcx ThreadLocal<ast::Arena>,
44 resolve_imports: bool,
46 parsed: bool,
48 gcx: Gcx<'gcx>,
49}
50
51impl<'gcx> ParsingContext<'gcx> {
52 pub(crate) fn new(mut gcx_: GcxMut<'gcx>) -> Self {
54 let gcx = gcx_.get_mut();
55 let sess = gcx.sess;
56 let mut file_resolver = FileResolver::new(sess.source_map());
57 file_resolver.configure_from_sess(sess);
58 Self {
59 sess,
60 file_resolver,
61 sources: &mut gcx.sources,
62 arenas: &gcx.ast_arenas,
63 resolve_imports: !sess.opts.unstable.no_resolve_imports,
64 parsed: false,
65 gcx: gcx_.get(),
66 }
67 }
68
69 #[inline]
71 pub fn dcx(&self) -> &'gcx DiagCtxt {
72 &self.sess.dcx
73 }
74
75 pub fn set_resolve_imports(&mut self, resolve_imports: bool) {
79 self.resolve_imports = resolve_imports;
80 }
81
82 pub fn resolve_file(&self, path: impl AsRef<Path>) -> Result<Arc<SourceFile>> {
84 self.file_resolver.resolve_file(path.as_ref(), None).map_err(self.map_resolve_error())
85 }
86
87 pub fn resolve_files(
89 &self,
90 paths: impl IntoIterator<Item = impl AsRef<Path>>,
91 ) -> impl Iterator<Item = Result<Arc<SourceFile>>> {
92 paths.into_iter().map(|path| self.resolve_file(path))
93 }
94
95 pub fn par_resolve_files(
97 &self,
98 paths: impl IntoParallelIterator<Item = impl AsRef<Path>>,
99 ) -> impl ParallelIterator<Item = Result<Arc<SourceFile>>> {
100 paths.into_par_iter().map(|path| self.resolve_file(path))
101 }
102
103 #[instrument(level = "debug", skip_all)]
105 pub fn load_stdin(&mut self) -> Result<()> {
106 let file = self.file_resolver.load_stdin().map_err(self.map_resolve_error())?;
107 self.add_file(file);
108 Ok(())
109 }
110
111 #[instrument(level = "debug", skip_all)]
113 pub fn load_files(&mut self, paths: impl IntoIterator<Item = impl AsRef<Path>>) -> Result<()> {
114 for path in paths {
115 self.load_file(path.as_ref())?;
116 }
117 Ok(())
118 }
119
120 pub fn par_load_files(
122 &mut self,
123 paths: impl IntoParallelIterator<Item = impl AsRef<Path>>,
124 ) -> Result<()> {
125 let resolved = self.par_resolve_files(paths).collect::<Result<Vec<_>>>()?;
126 self.add_files(resolved);
127 Ok(())
128 }
129
130 #[instrument(level = "debug", skip_all)]
132 pub fn load_file(&mut self, path: &Path) -> Result<()> {
133 let file = self.resolve_file(path)?;
134 self.add_file(file);
135 Ok(())
136 }
137
138 pub fn add_files(&mut self, files: impl IntoIterator<Item = Arc<SourceFile>>) {
140 for file in files {
141 self.add_file(file);
142 }
143 }
144
145 pub fn add_file(&mut self, file: Arc<SourceFile>) {
147 self.sources.get_or_insert_file(file);
148 }
149
150 pub fn force_resolve_all_imports(mut self) {
152 let mut sources = std::mem::take(self.sources);
153 let mut any_new = false;
154 for id in sources.indices() {
155 let source = &mut sources[id];
156 let ast = source.ast.take();
157 for (import_item_id, import_file) in
158 self.resolve_imports(&source.file.clone(), ast.as_ref())
159 {
160 let (_import_id, is_new) =
161 sources.add_import(id, import_item_id, import_file, true);
162 if is_new {
163 any_new = true;
164 }
165 }
166 sources[id].ast = ast;
167 }
168 *self.sources = sources;
169
170 self.parsed = true;
171 if any_new {
172 self.parse_inner();
173 }
174 }
175
176 pub fn parse(mut self) {
180 self.parse_inner();
181 }
182
183 #[instrument(name = "parse", level = "debug", skip_all)]
184 fn parse_inner(&mut self) {
185 self.parsed = true;
186 let _ = self.gcx.advance_stage(CompilerStage::Parsing);
187
188 let mut sources = std::mem::take(self.sources);
189 if !sources.is_empty() {
190 let dbg = enabled!(tracing::Level::DEBUG);
191 let len_before = sources.len();
192 let sources_parsed_before = if dbg { sources.count_parsed() } else { 0 };
193
194 if self.sess.is_sequential() || (sources.len() == 1 && !self.resolve_imports) {
195 self.parse_sequential(&mut sources, self.arenas.get_or_default());
196 } else {
197 self.parse_parallel(&mut sources, self.arenas);
198 }
199
200 if dbg {
201 let len_after = sources.len();
202 let sources_added =
203 len_after.checked_sub(len_before).expect("parsing removed sources?");
204
205 let sources_parsed_after = sources.count_parsed();
206 let solidity_sources_parsed = sources_parsed_after
207 .checked_sub(sources_parsed_before)
208 .expect("parsing removed parsed sources?");
209
210 if sources_added > 0 || solidity_sources_parsed > 0 {
211 debug!(
212 sources_added,
213 solidity_sources_parsed,
214 num_sources = len_after,
215 num_contracts = sources.iter().map(|s| s.count_contracts()).sum::<usize>(),
216 total_bytes = %crate::fmt_bytes(sources.iter().map(|s| s.file.src.len()).sum::<usize>()),
217 total_lines = sources.iter().map(|s| s.file.count_lines()).sum::<usize>(),
218 "parsed",
219 );
220 }
221 }
222 }
223
224 sources.assert_unique();
225 *self.sources = sources;
226 }
227
228 fn parse_sequential<'ast>(&self, sources: &mut Sources<'ast>, arena: &'ast ast::Arena) {
229 for i in 0.. {
230 let id = SourceId::from_usize(i);
231 let Some(source) = sources.get(id) else { break };
232 if source.ast.is_some() {
233 continue;
234 }
235
236 let ast = self.parse_one(&source.file, arena);
237 let _guard = debug_span!("resolve_imports").entered();
238 for (import_item_id, import_file) in
239 self.resolve_imports(&source.file.clone(), ast.as_ref())
240 {
241 sources.add_import(id, import_item_id, import_file, false);
242 }
243 sources[id].ast = ast;
244 }
245 }
246
247 fn parse_parallel<'ast>(
248 &self,
249 sources: &mut Sources<'ast>,
250 arenas: &'ast ThreadLocal<ast::Arena>,
251 ) {
252 let lock = Mutex::new(std::mem::take(sources));
253 rayon::scope(|scope| {
254 let sources = &*lock.lock();
255 for (id, source) in sources.iter_enumerated() {
256 if source.ast.is_some() {
257 continue;
258 }
259 let file = source.file.clone();
260 self.spawn_parse_job(&lock, id, file, arenas, scope);
261 }
262 });
263 *sources = lock.into_inner();
264 }
265
266 fn spawn_parse_job<'ast, 'scope>(
267 &'scope self,
268 lock: &'scope Mutex<Sources<'ast>>,
269 id: SourceId,
270 file: Arc<SourceFile>,
271 arenas: &'ast ThreadLocal<ast::Arena>,
272 scope: &rayon::Scope<'scope>,
273 ) {
274 scope.spawn(move |scope| self.parse_job(lock, id, file, arenas, scope));
275 }
276
277 #[instrument(level = "debug", skip_all)]
278 fn parse_job<'ast, 'scope>(
279 &'scope self,
280 lock: &'scope Mutex<Sources<'ast>>,
281 id: SourceId,
282 file: Arc<SourceFile>,
283 arenas: &'ast ThreadLocal<ast::Arena>,
284 scope: &rayon::Scope<'scope>,
285 ) {
286 let ast = self.parse_one(&file, arenas.get_or_default());
288 let imports = {
289 let _guard = debug_span!("resolve_imports").entered();
290 self.resolve_imports(&file, ast.as_ref()).collect::<Vec<_>>()
291 };
292
293 let _guard = debug_span!("add_imports").entered();
295 let sources = &mut *lock.lock();
296 assert!(sources[id].ast.is_none());
297 sources[id].ast = ast;
298 for (import_item_id, import_file) in imports {
299 let (import_id, is_new) =
300 sources.add_import(id, import_item_id, import_file.clone(), false);
301 if is_new {
302 self.spawn_parse_job(lock, import_id, import_file, arenas, scope);
303 }
304 }
305 }
306
307 #[instrument(level = "debug", skip_all, fields(file = %file.name.display()))]
309 fn parse_one<'ast>(
310 &self,
311 file: &SourceFile,
312 arena: &'ast ast::Arena,
313 ) -> Option<ast::SourceUnit<'ast>> {
314 let lexer = Lexer::from_source_file(self.sess, file);
315 let mut parser = Parser::from_lexer(arena, lexer);
316 if self.sess.opts.language.is_yul() {
317 let _file = parser.parse_yul_file_object().map_err(|e| e.emit());
318 None
319 } else {
320 parser.parse_file().map_err(|e| e.emit()).ok()
321 }
322 }
323
324 fn resolve_imports(
327 &self,
328 file: &SourceFile,
329 ast: Option<&ast::SourceUnit<'_>>,
330 ) -> impl Iterator<Item = (ast::ItemId, Arc<SourceFile>)> {
331 let parent = match &file.name {
332 FileName::Real(path) => Some(path.as_path()),
333 FileName::Stdin | FileName::Custom(_) => None,
334 };
335 let items =
336 ast.filter(|_| self.resolve_imports).map(|ast| &ast.items[..]).unwrap_or_default();
337 items
338 .iter_enumerated()
339 .filter_map(move |(id, item)| self.resolve_import(item, parent).map(|file| (id, file)))
340 }
341
342 fn resolve_import(
343 &self,
344 item: &ast::Item<'_>,
345 parent: Option<&Path>,
346 ) -> Option<Arc<SourceFile>> {
347 let ast::ItemKind::Import(import) = &item.kind else { return None };
348 self.resolve_import_directive(import, parent)
349 }
350
351 fn resolve_import_directive(
352 &self,
353 import: &ast::ImportDirective<'_>,
354 parent: Option<&Path>,
355 ) -> Option<Arc<SourceFile>> {
356 let span = import.path.span;
357 let path_str = import.path.value.as_str();
358 let (path_bytes, any_error) =
359 unescape::parse_string_literal(path_str, unescape::StrKind::Str, span, self.sess);
360 if any_error {
361 return None;
362 }
363 let Some(path) = path_from_bytes(&path_bytes[..]) else {
364 self.dcx().err("import path is not a valid UTF-8 string").span(span).emit();
365 return None;
366 };
367 self.file_resolver
368 .resolve_file(path, parent)
369 .map_err(self.map_resolve_error_with(Some(span)))
370 .ok()
371 }
372
373 fn map_resolve_error(&self) -> impl FnOnce(ResolveError) -> ErrorGuaranteed {
374 self.map_resolve_error_with(None)
375 }
376
377 fn map_resolve_error_with(
378 &self,
379 span: Option<Span>,
380 ) -> impl FnOnce(ResolveError) -> ErrorGuaranteed {
381 move |e| {
382 let mut err = self.dcx().err(e.to_string());
383 if let Some(span) = span {
384 err = err.span(span);
385 }
386 err.emit()
387 }
388 }
389}
390
391impl Drop for ParsingContext<'_> {
392 fn drop(&mut self) {
393 if self.parsed {
394 return;
395 }
396 warn!("`ParsingContext::parse` not called");
399 }
400}
401
402#[cfg(unix)]
403fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
404 use std::os::unix::ffi::OsStrExt;
405 Some(Path::new(std::ffi::OsStr::from_bytes(bytes)))
406}
407
408#[cfg(not(unix))]
409fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
410 std::str::from_utf8(bytes).ok().map(Path::new)
411}
412
413#[derive(Default)]
415pub struct Sources<'ast> {
416 sources: IndexVec<SourceId, Source<'ast>>,
417 file_to_id: FxHashMap<Arc<SourceFile>, SourceId>,
418}
419
420impl fmt::Debug for Sources<'_> {
421 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422 f.write_str("ParsedSources")?;
423 self.sources.fmt(f)
424 }
425}
426
427impl<'ast> Sources<'ast> {
428 pub fn new() -> Self {
430 Self::default()
431 }
432
433 #[inline]
435 pub fn get(&self, id: SourceId) -> Option<&Source<'ast>> {
436 self.sources.get(id)
437 }
438
439 #[inline]
441 pub fn get_mut(&mut self, id: SourceId) -> Option<&mut Source<'ast>> {
442 self.sources.get_mut(id)
443 }
444
445 pub fn get_file(&self, file: &Arc<SourceFile>) -> Option<(SourceId, &Source<'ast>)> {
447 self.file_to_id.get(file).map(|&id| {
448 debug_assert_eq!(self.sources[id].file, *file, "file_to_id is inconsistent");
449 (id, &self.sources[id])
450 })
451 }
452
453 pub fn get_file_mut(
455 &mut self,
456 file: &Arc<SourceFile>,
457 ) -> Option<(SourceId, &mut Source<'ast>)> {
458 self.file_to_id.get(file).map(|&id| {
459 debug_assert_eq!(self.sources[id].file, *file, "file_to_id is inconsistent");
460 (id, &mut self.sources[id])
461 })
462 }
463
464 #[instrument(level = "debug", skip_all)]
468 pub fn get_or_insert_file(&mut self, file: Arc<SourceFile>) -> (SourceId, bool) {
469 let mut new = false;
470 let id = *self.file_to_id.entry(file).or_insert_with_key(|file| {
471 new = true;
472 self.sources.push(Source::new(file.clone()))
473 });
474 (id, new)
475 }
476
477 pub fn remove_file(&mut self, file: &Arc<SourceFile>) -> Option<Source<'ast>> {
479 self.file_to_id.remove(file).map(|id| self.sources.remove(id))
480 }
481
482 pub fn asts(&self) -> impl DoubleEndedIterator<Item = &ast::SourceUnit<'ast>> {
484 self.sources.iter().filter_map(|source| source.ast.as_ref())
485 }
486
487 pub fn par_asts(&self) -> impl ParallelIterator<Item = &ast::SourceUnit<'ast>> {
489 self.sources.as_raw_slice().par_iter().filter_map(|source| source.ast.as_ref())
490 }
491
492 fn count_parsed(&self) -> usize {
493 self.sources.iter().filter(|s| s.ast.is_some()).count()
494 }
495
496 fn add_import(
498 &mut self,
499 current: SourceId,
500 import_item_id: ast::ItemId,
501 import: Arc<SourceFile>,
502 check_dup: bool,
503 ) -> (SourceId, bool) {
504 let ret = self.get_or_insert_file(import);
505 let (import_id, new) = ret;
506
507 let current = &mut self.sources[current].imports;
508 let value = (import_item_id, import_id);
509 if check_dup && current.contains(&value) {
510 assert!(!new, "duplicate import but source is new?");
511 return ret;
512 }
513 current.push(value);
514
515 ret
516 }
517
518 fn assert_unique(&self) {
520 if self.sources.len() <= 1 {
521 return;
522 }
523
524 debug_assert_eq!(
525 self.sources.iter().map(|s| &*s.file).collect::<FxHashSet<_>>().len(),
526 self.sources.len(),
527 "parsing produced duplicate source files"
528 );
529 }
530
531 #[instrument(level = "debug", skip_all)]
535 pub fn topo_sort(&mut self) {
536 let len = self.len();
537 if len <= 1 {
538 return;
539 }
540
541 let mut order = IndexVec::with_capacity(len);
542 let mut map = index_vec![SourceId::MAX; len];
543 let mut seen = FxHashSet::with_capacity_and_hasher(len, Default::default());
544 debug_span!("topo_order").in_scope(|| {
545 for id in self.sources.indices() {
546 self.topo_order(id, &mut order, &mut map, &mut seen);
547 }
548 });
549 debug_assert!(
550 order.len() == len && !map.contains(&SourceId::MAX) && seen.len() == len,
551 "topo_order did not visit all sources"
552 );
553
554 debug_span!("remap_state").in_scope(|| {
555 for source in &mut self.sources {
556 for (_, import) in &mut source.imports {
557 *import = map[*import];
558 }
559 }
560
561 for id in self.file_to_id.values_mut() {
562 *id = map[*id];
563 }
564 });
565
566 debug_span!("sort_by_indices").in_scope(|| {
567 sort_by_indices(&mut self.sources, order);
568 });
569 }
570
571 fn topo_order(
572 &self,
573 id: SourceId,
574 order: &mut IndexVec<SourceId, SourceId>,
575 map: &mut IndexVec<SourceId, SourceId>,
576 seen: &mut FxHashSet<SourceId>,
577 ) {
578 if !seen.insert(id) {
579 return;
580 }
581 for &(_, import_id) in &self.sources[id].imports {
582 self.topo_order(import_id, order, map, seen);
583 }
584 map[id] = order.push(id);
585 }
586}
587
588impl<'ast> std::ops::Deref for Sources<'ast> {
589 type Target = IndexVec<SourceId, Source<'ast>>;
590
591 #[inline]
592 fn deref(&self) -> &Self::Target {
593 &self.sources
594 }
595}
596
597impl std::ops::DerefMut for Sources<'_> {
598 #[inline]
599 fn deref_mut(&mut self) -> &mut Self::Target {
600 &mut self.sources
601 }
602}
603
604pub struct Source<'ast> {
606 pub file: Arc<SourceFile>,
608 pub imports: Vec<(ast::ItemId, SourceId)>,
613 pub ast: Option<ast::SourceUnit<'ast>>,
621}
622
623impl fmt::Debug for Source<'_> {
624 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
625 f.debug_struct("Source")
626 .field("file", &self.file.name)
627 .field("imports", &self.imports)
628 .field("ast", &self.ast.as_ref().map(|ast| format!("{} items", ast.items.len())))
629 .finish()
630 }
631}
632
633impl Source<'_> {
634 pub fn new(file: Arc<SourceFile>) -> Self {
636 Self { file, ast: None, imports: Vec::new() }
637 }
638
639 fn count_contracts(&self) -> usize {
640 self.ast.as_ref().map(|ast| ast.count_contracts()).unwrap_or(0)
641 }
642}
643
644fn sort_by_indices<I: Idx, T>(data: &mut IndexVec<I, T>, mut indices: IndexVec<I, I>) {
648 assert_eq!(data.len(), indices.len());
649 for idx in data.indices() {
650 if indices[idx] != idx {
651 let mut current_idx = idx;
652 loop {
653 let target_idx = indices[current_idx];
654 indices[current_idx] = current_idx;
655 if indices[target_idx] == target_idx {
656 break;
657 }
658 data.swap(current_idx, target_idx);
659 current_idx = target_idx;
660 }
661 }
662 }
663}
664
665#[cfg(test)]
666mod tests {
667 use std::path::PathBuf;
668
669 use solar_ast::ItemId;
670
671 use super::*;
672
673 #[test]
674 fn sources_consistency() {
675 let sess = Session::builder().with_test_emitter().build();
676 sess.enter_sequential(|| {
677 let mut sources = Sources::new();
678
679 let (aid, new) = sources.get_or_insert_file(
680 sess.source_map().new_source_file(PathBuf::from("a.sol"), "abcd").unwrap(),
681 );
682 assert!(new);
683
684 let (bid, new) = sources.get_or_insert_file(
685 sess.source_map().new_source_file(PathBuf::from("b.sol"), "aaaaa").unwrap(),
686 );
687 assert!(new);
688
689 let (cid, new) = sources.get_or_insert_file(
690 sess.source_map().new_source_file(PathBuf::from("c.sol"), "cccccc").unwrap(),
691 );
692 assert!(new);
693
694 let files = vec![
695 (aid, PathBuf::from("a.sol")),
696 (bid, PathBuf::from("b.sol")),
697 (cid, PathBuf::from("c.sol")),
698 ];
699
700 sources[aid].imports.push((ItemId::new(0), cid));
701
702 for (id, path) in &files {
703 assert_eq!(sources[*id].file.name, FileName::Real(path.clone()));
704 }
705
706 let assert_maps = |sources: &mut Sources<'_>| {
707 for (_, path) in &files {
708 let file = sess.source_map().get_file(path).unwrap();
709 let id = sources.get_file(&file).unwrap().0;
710 assert_eq!(sources[id].file, file);
711 }
712 };
713
714 assert_maps(&mut sources);
715 sources.topo_sort();
716 assert_maps(&mut sources);
717 });
718 }
719}