aha_wit_parser/
lib.rs

1use anyhow::{anyhow, bail, Context, Result};
2use id_arena::{Arena, Id};
3use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag};
4use std::collections::{HashMap, HashSet};
5use std::fs;
6use std::path::{Path, PathBuf};
7
8pub mod abi;
9mod ast;
10mod sizealign;
11pub use sizealign::*;
12
13/// Checks if the given string is a legal identifier in wit.
14pub fn validate_id(s: &str) -> Result<()> {
15    ast::validate_id(0, s)?;
16    Ok(())
17}
18
19#[derive(Debug, Default)]
20pub struct Interface {
21    pub name: String,
22    /// The module name to use for bindings generation.
23    ///
24    /// If `None`, then the interface name will be used.
25    ///
26    /// If `Some`, then this value is used to format an export
27    /// name of `<module>#<name>` for exports or an import module
28    /// name of `<module>` for imports.
29    pub module: Option<String>,
30    pub types: Arena<TypeDef>,
31    pub type_lookup: HashMap<String, TypeId>,
32    pub resources: Arena<Resource>,
33    pub resource_lookup: HashMap<String, ResourceId>,
34    pub interfaces: Arena<Interface>,
35    pub interface_lookup: HashMap<String, InterfaceId>,
36    pub functions: Vec<Function>,
37    pub globals: Vec<Global>,
38}
39
40pub type TypeId = Id<TypeDef>;
41pub type ResourceId = Id<Resource>;
42pub type InterfaceId = Id<Interface>;
43
44#[derive(Debug)]
45pub struct TypeDef {
46    pub docs: Docs,
47    pub kind: TypeDefKind,
48    pub name: Option<String>,
49    /// `None` if this type is originally declared in this instance or
50    /// otherwise `Some` if it was originally defined in a different module.
51    pub foreign_module: Option<String>,
52}
53
54#[derive(Debug)]
55pub enum TypeDefKind {
56    Record(Record),
57    Flags(Flags),
58    Tuple(Tuple),
59    Variant(Variant),
60    Enum(Enum),
61    Option(Type),
62    Expected(Expected),
63    Union(Union),
64    List(Type),
65    Future(Type),
66    Stream(Stream),
67    Type(Type),
68}
69
70#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
71pub enum Type {
72    Unit,
73    Bool,
74    U8,
75    U16,
76    U32,
77    U64,
78    S8,
79    S16,
80    S32,
81    S64,
82    Float32,
83    Float64,
84    Char,
85    String,
86    Handle(ResourceId),
87    Id(TypeId),
88}
89
90#[derive(PartialEq, Debug, Copy, Clone)]
91pub enum Int {
92    U8,
93    U16,
94    U32,
95    U64,
96}
97
98#[derive(Debug)]
99pub struct Record {
100    pub fields: Vec<Field>,
101}
102
103#[derive(Debug)]
104pub struct Field {
105    pub docs: Docs,
106    pub name: String,
107    pub ty: Type,
108}
109
110#[derive(Debug, Clone)]
111pub struct Flags {
112    pub flags: Vec<Flag>,
113}
114
115#[derive(Debug, Clone)]
116pub struct Flag {
117    pub docs: Docs,
118    pub name: String,
119}
120
121#[derive(Debug)]
122pub enum FlagsRepr {
123    U8,
124    U16,
125    U32(usize),
126}
127
128impl Flags {
129    pub fn repr(&self) -> FlagsRepr {
130        match self.flags.len() {
131            n if n <= 8 => FlagsRepr::U8,
132            n if n <= 16 => FlagsRepr::U16,
133            n => FlagsRepr::U32(sizealign::align_to(n, 32) / 32),
134        }
135    }
136}
137
138impl FlagsRepr {
139    pub fn count(&self) -> usize {
140        match self {
141            FlagsRepr::U8 => 1,
142            FlagsRepr::U16 => 1,
143            FlagsRepr::U32(n) => *n,
144        }
145    }
146}
147
148#[derive(Debug, Clone)]
149pub struct Tuple {
150    pub types: Vec<Type>,
151}
152
153#[derive(Debug)]
154pub struct Variant {
155    pub cases: Vec<Case>,
156}
157
158#[derive(Debug)]
159pub struct Case {
160    pub docs: Docs,
161    pub name: String,
162    pub ty: Type,
163}
164
165impl Variant {
166    pub fn tag(&self) -> Int {
167        match self.cases.len() {
168            n if n <= u8::max_value() as usize => Int::U8,
169            n if n <= u16::max_value() as usize => Int::U16,
170            n if n <= u32::max_value() as usize => Int::U32,
171            _ => panic!("too many cases to fit in a repr"),
172        }
173    }
174}
175
176#[derive(Debug)]
177pub struct Enum {
178    pub cases: Vec<EnumCase>,
179}
180
181#[derive(Debug, Clone)]
182pub struct EnumCase {
183    pub docs: Docs,
184    pub name: String,
185}
186
187impl Enum {
188    pub fn tag(&self) -> Int {
189        match self.cases.len() {
190            n if n <= u8::max_value() as usize => Int::U8,
191            n if n <= u16::max_value() as usize => Int::U16,
192            n if n <= u32::max_value() as usize => Int::U32,
193            _ => panic!("too many cases to fit in a repr"),
194        }
195    }
196}
197
198#[derive(Debug)]
199pub struct Expected {
200    pub ok: Type,
201    pub err: Type,
202}
203
204#[derive(Debug)]
205pub struct Union {
206    pub cases: Vec<UnionCase>,
207}
208
209#[derive(Debug, Clone)]
210pub struct UnionCase {
211    pub docs: Docs,
212    pub ty: Type,
213}
214
215impl Union {
216    pub fn tag(&self) -> Int {
217        match self.cases.len() {
218            n if n <= u8::max_value() as usize => Int::U8,
219            n if n <= u16::max_value() as usize => Int::U16,
220            n if n <= u32::max_value() as usize => Int::U32,
221            _ => panic!("too many cases to fit in a repr"),
222        }
223    }
224}
225
226#[derive(Debug)]
227pub struct Stream {
228    pub element: Type,
229    pub end: Type,
230}
231
232#[derive(Clone, Default, Debug)]
233pub struct Docs {
234    pub contents: Option<String>,
235}
236
237#[derive(Debug)]
238pub struct Resource {
239    pub docs: Docs,
240    pub name: String,
241    /// `None` if this resource is defined within the containing instance,
242    /// otherwise `Some` if it's defined in an instance named here.
243    pub foreign_module: Option<String>,
244}
245
246#[derive(Debug)]
247pub struct Global {
248    pub docs: Docs,
249    pub name: String,
250    pub ty: Type,
251}
252
253#[derive(Debug)]
254pub struct Function {
255    pub is_async: bool,
256    pub docs: Docs,
257    pub name: String,
258    pub kind: FunctionKind,
259    pub params: Vec<(String, Type)>,
260    pub result: Type,
261}
262
263#[derive(Debug)]
264pub enum FunctionKind {
265    Freestanding,
266    Static { resource: ResourceId, name: String },
267    Method { resource: ResourceId, name: String },
268}
269
270impl Function {
271    pub fn item_name(&self) -> &str {
272        match &self.kind {
273            FunctionKind::Freestanding => &self.name,
274            FunctionKind::Static { name, .. } => name,
275            FunctionKind::Method { name, .. } => name,
276        }
277    }
278}
279
280fn unwrap_md(contents: &str) -> String {
281    let mut wit = String::new();
282    let mut last_pos = 0;
283    let mut in_wit_code_block = false;
284    Parser::new_ext(contents, Options::empty())
285        .into_offset_iter()
286        .for_each(|(event, range)| match (event, range) {
287            (Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("wit")))), _) => {
288                in_wit_code_block = true;
289            }
290            (Event::Text(text), range) if in_wit_code_block => {
291                // Ensure that offsets are correct by inserting newlines to
292                // cover the Markdown content outside of wit code blocks.
293                for _ in contents[last_pos..range.start].lines() {
294                    wit.push_str("\n");
295                }
296                wit.push_str(&text);
297                last_pos = range.end;
298            }
299            (Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("wit")))), _) => {
300                in_wit_code_block = false;
301            }
302            _ => {}
303        });
304    wit
305}
306
307impl Interface {
308    pub fn parse(name: &str, input: &str) -> Result<Interface> {
309        Interface::parse_with(name, input, |f| {
310            Err(anyhow!("cannot load submodule `{}`", f))
311        })
312    }
313
314    pub fn parse_file(path: impl AsRef<Path>) -> Result<Interface> {
315        let path = path.as_ref();
316        let parent = path.parent().unwrap();
317        let contents = std::fs::read_to_string(&path)
318            .with_context(|| format!("failed to read: {}", path.display()))?;
319        Interface::parse_with(path, &contents, |path| load_fs(parent, path))
320    }
321
322    pub fn parse_with(
323        filename: impl AsRef<Path>,
324        contents: &str,
325        mut load: impl FnMut(&str) -> Result<(PathBuf, String)>,
326    ) -> Result<Interface> {
327        Interface::_parse_with(
328            filename.as_ref(),
329            contents,
330            &mut load,
331            &mut HashSet::new(),
332            &mut HashMap::new(),
333        )
334    }
335
336    fn _parse_with(
337        filename: &Path,
338        contents: &str,
339        load: &mut dyn FnMut(&str) -> Result<(PathBuf, String)>,
340        visiting: &mut HashSet<PathBuf>,
341        map: &mut HashMap<String, Interface>,
342    ) -> Result<Interface> {
343        let mut name = filename.file_stem().unwrap();
344        let mut contents = contents;
345
346        // If we have a ".md" file, it's a wit file wrapped in a markdown file;
347        // parse the markdown to extract the `wit` code blocks.
348        let md_contents;
349        if filename.extension().and_then(|s| s.to_str()) == Some("md") {
350            md_contents = unwrap_md(contents);
351            contents = &md_contents[..];
352
353            // Also strip the inner ".wit" extension.
354            name = Path::new(name).file_stem().unwrap();
355        }
356
357        // Parse the `contents `into an AST
358        let ast = match ast::Ast::parse(contents) {
359            Ok(ast) => ast,
360            Err(mut e) => {
361                let file = filename.display().to_string();
362                ast::rewrite_error(&mut e, &file, contents);
363                return Err(e);
364            }
365        };
366
367        // Load up any modules into our `map` that have not yet been parsed.
368        if !visiting.insert(filename.to_path_buf()) {
369            bail!("file `{}` recursively imports itself", filename.display())
370        }
371        for item in ast.items.iter() {
372            let u = match item {
373                ast::Item::Use(u) => u,
374                _ => continue,
375            };
376            if map.contains_key(&*u.from[0].name) {
377                continue;
378            }
379            let (filename, contents) = load(&u.from[0].name)
380                // TODO: insert context here about `u.name.span` and `filename`
381                ?;
382            let instance = Interface::_parse_with(&filename, &contents, load, visiting, map)?;
383            map.insert(u.from[0].name.to_string(), instance);
384        }
385        visiting.remove(filename);
386
387        // and finally resolve everything into our final instance
388        match ast.resolve(name.to_str().unwrap(), map) {
389            Ok(i) => Ok(i),
390            Err(mut e) => {
391                let file = filename.display().to_string();
392                ast::rewrite_error(&mut e, &file, contents);
393                Err(e)
394            }
395        }
396    }
397
398    pub fn topological_types(&self) -> Vec<TypeId> {
399        let mut ret = Vec::new();
400        let mut visited = HashSet::new();
401        for (id, _) in self.types.iter() {
402            self.topo_visit(id, &mut ret, &mut visited);
403        }
404        ret
405    }
406
407    fn topo_visit(&self, id: TypeId, list: &mut Vec<TypeId>, visited: &mut HashSet<TypeId>) {
408        if !visited.insert(id) {
409            return;
410        }
411        match &self.types[id].kind {
412            TypeDefKind::Flags(_) | TypeDefKind::Enum(_) => {}
413            TypeDefKind::Type(t) | TypeDefKind::List(t) => self.topo_visit_ty(t, list, visited),
414            TypeDefKind::Record(r) => {
415                for f in r.fields.iter() {
416                    self.topo_visit_ty(&f.ty, list, visited);
417                }
418            }
419            TypeDefKind::Tuple(t) => {
420                for t in t.types.iter() {
421                    self.topo_visit_ty(t, list, visited);
422                }
423            }
424            TypeDefKind::Variant(v) => {
425                for v in v.cases.iter() {
426                    self.topo_visit_ty(&v.ty, list, visited);
427                }
428            }
429            TypeDefKind::Option(ty) => self.topo_visit_ty(ty, list, visited),
430            TypeDefKind::Expected(e) => {
431                self.topo_visit_ty(&e.ok, list, visited);
432                self.topo_visit_ty(&e.err, list, visited);
433            }
434            TypeDefKind::Union(u) => {
435                for t in u.cases.iter() {
436                    self.topo_visit_ty(&t.ty, list, visited);
437                }
438            }
439            TypeDefKind::Future(ty) => {
440                self.topo_visit_ty(ty, list, visited);
441            }
442            TypeDefKind::Stream(s) => {
443                self.topo_visit_ty(&s.element, list, visited);
444                self.topo_visit_ty(&s.end, list, visited);
445            }
446        }
447        list.push(id);
448    }
449
450    fn topo_visit_ty(&self, ty: &Type, list: &mut Vec<TypeId>, visited: &mut HashSet<TypeId>) {
451        if let Type::Id(id) = ty {
452            self.topo_visit(*id, list, visited);
453        }
454    }
455
456    pub fn all_bits_valid(&self, ty: &Type) -> bool {
457        match ty {
458            Type::Unit
459            | Type::U8
460            | Type::S8
461            | Type::U16
462            | Type::S16
463            | Type::U32
464            | Type::S32
465            | Type::U64
466            | Type::S64
467            | Type::Float32
468            | Type::Float64 => true,
469
470            Type::Bool | Type::Char | Type::Handle(_) | Type::String => false,
471
472            Type::Id(id) => match &self.types[*id].kind {
473                TypeDefKind::List(_)
474                | TypeDefKind::Variant(_)
475                | TypeDefKind::Enum(_)
476                | TypeDefKind::Option(_)
477                | TypeDefKind::Expected(_)
478                | TypeDefKind::Future(_)
479                | TypeDefKind::Stream(_)
480                | TypeDefKind::Union(_) => false,
481                TypeDefKind::Type(t) => self.all_bits_valid(t),
482                TypeDefKind::Record(r) => r.fields.iter().all(|f| self.all_bits_valid(&f.ty)),
483                TypeDefKind::Tuple(t) => t.types.iter().all(|t| self.all_bits_valid(t)),
484
485                // FIXME: this could perhaps be `true` for multiples-of-32 but
486                // seems better to probably leave this as unconditionally
487                // `false` for now, may want to reconsider later?
488                TypeDefKind::Flags(_) => false,
489            },
490        }
491    }
492
493    pub fn get_variant(&self, ty: &Type) -> Option<&Variant> {
494        if let Type::Id(id) = ty {
495            match &self.types[*id].kind {
496                TypeDefKind::Variant(v) => Some(v),
497                _ => None,
498            }
499        } else {
500            None
501        }
502    }
503}
504
505fn load_fs(root: &Path, name: &str) -> Result<(PathBuf, String)> {
506    let wit = root.join(name).with_extension("wit");
507
508    // Attempt to read a ".wit" file.
509    match fs::read_to_string(&wit) {
510        Ok(contents) => Ok((wit, contents)),
511
512        // If no such file was found, attempt to read a ".wit.md" file.
513        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
514            let wit_md = wit.with_extension("wit.md");
515            match fs::read_to_string(&wit_md) {
516                Ok(contents) => Ok((wit_md, contents)),
517                Err(_err) => Err(err.into()),
518            }
519        }
520
521        Err(err) => return Err(err.into()),
522    }
523}