blr-lang 0.1.0

A language implementation that provides type safe dataframes
Documentation
//! Implementation of the blr compiler.
//!
//! The general architecture is query based, meaning instead of a rigid sequence of steps the
//! comopiler queries (and memoizes in a database) the needed state for each step of the
//! compilation.
//!
//! The compiler has several intermediate represetnations each one closer to the final WASM target.
//! To keep the distitinction clear between these represetnations each one is named after the
//! layers of rocky planet, communicating that each layer is lower than the previous but otherwise
//! is an arbitrary name.
//!
//! These layers are in order:
//!
//! * [`air`]
//! * [`crust`]
//! * [`mantle`]
//! * [`core`]
//! * [`nucleus`]
//!
//! Each layer has its own expression type and intermediate language specialized for the purpose
//! of that particular layer.
//!
//
//! Blr is designed to easliy interoperate with WASM components from other host langauges.
//! In the compiler `native` refers to code written in blr and compiled by this compiler while
//! `external` refers to WASM components compiled by other languages and compilers.

mod air;
mod core;
pub mod crust;
mod mantle;
pub mod nucleus;
pub mod sexpr;

use std::{collections::HashMap, path::PathBuf};

use codespan_reporting::{
    diagnostic::{Diagnostic, Label},
    files::SimpleFiles,
    term::{self, termcolor::Buffer},
};
use lalrpop_util::ParseError;
use snafu::ResultExt as _;
use tokio::fs;
use tracing::{Level, debug, enabled, instrument};

use crate::{
    Error, IOSnafu, Result, TypeSnafu,
    compiler::nucleus::SharedRuntime,
    debug_alt,
    runtime::{
        binary::register_binary_operator_imports, conv::register_conversion_imports,
        register_runtime,
    },
};

pub use crust::{ItemSource, TypeError};

pub struct Database {
    path_root: PathBuf,
    root_src: String,
    item_source: ItemSource,
    item_sources: HashMap<String, ItemSource>,
    shared_runtime: SharedRuntime,
}
impl Database {
    pub fn new(path_root: impl Into<PathBuf>, root_src: String, item_source: ItemSource) -> Self {
        Self {
            path_root: path_root.into(),
            root_src,
            item_source,
            item_sources: Default::default(),
            shared_runtime: Default::default(),
        }
    }
}

async fn snapshot(path: &str, name: &str, format_fn: impl FnOnce() -> String) {
    if let Ok(root) = std::env::var("BLR_SNAPSHOT") {
        let module_path = path.replace("::", "/");
        let dir = PathBuf::from("snapshots").join(root).join(module_path);
        let _ = fs::create_dir_all(&dir).await;
        let content = format_fn();
        let file_path = dir.join(name);
        let _ = fs::write(&file_path, content).await;
        debug!(file_path = %file_path.display(), "snapshot saved");
    }
}

#[instrument(skip(db))]
pub async fn root_component(db: &mut Database) -> Result<Vec<u8>> {
    let path = "main";
    let main_mod = nucleus_module(db, path, &Default::default()).await?;
    let mut modules = Vec::new();

    let imports = imports(db, path).await?;
    modules.push((path.to_string(), blr_path_to_component_path(path), main_mod));
    let instances = instance_deps(db, path).await?;
    // TODO recurse into transitive imports
    for import in imports {
        let instances = instances
            .iter()
            .filter(|(symbol, _types)| symbol.module == import.path)
            .map(|(symbol, types)| (symbol.field.clone(), types.clone()))
            .collect();
        modules.push((
            import.path.clone(),
            blr_path_to_component_path(&import.path),
            Box::pin(nucleus_module(db, &import.path, &instances)).await?,
        ));
    }
    // TODO: Prune these if they are not needed
    register_binary_operator_imports(&mut modules);
    register_conversion_imports(&mut modules);

    register_runtime(shared_runtime(db).data_size(), &mut modules);
    // Reverse modules so all dependent modules come first
    modules.reverse();

    let (data, externals) = nucleus::component::emit_root_component(modules);
    if enabled!(Level::DEBUG) {
        let mut wat = wasmprinter::PrintFmtWrite(String::new());
        let mut config = wasmprinter::Config::new();
        config
            .fold_instructions(true)
            .print_offsets(true)
            .indent_text("  ")
            .print(&data, &mut wat)
            .expect("Printing WAT failed");
        debug_alt!(
            component_wat = crate::debug::RawString(&wat.0),
            "component wat"
        );
    }
    snapshot(path, "root.wat", || {
        let mut wat = wasmprinter::PrintFmtWrite(String::new());
        let mut config = wasmprinter::Config::new();
        config
            .fold_instructions(true)
            .print_offsets(false)
            .indent_text("  ")
            .print(&data, &mut wat)
            .expect("Printing WAT failed");
        wat.0
    })
    .await;

    // TODO: Re add linking logic with formal BLR_LD_PATH or similar
    // As of now all linked external components come from blr-std
    let _ = externals;

    Ok(data)
}

#[instrument(skip(db))]
pub fn shared_runtime(db: &mut Database) -> &mut SharedRuntime {
    &mut db.shared_runtime
}

#[instrument(skip(db))]
pub async fn nucleus_module(
    db: &mut Database,
    path: &str,
    item_instances: &HashMap<String, Vec<mantle::Type>>,
) -> Result<nucleus::Module> {
    let module = core_module(db, path, item_instances).await?;
    let module = nucleus::emit_wasm_core_module(module, shared_runtime(db));
    let wat_content = {
        let mut wat = wasmprinter::PrintFmtWrite(String::new());
        let mut config = wasmprinter::Config::new();
        config
            .fold_instructions(true)
            .indent_text("  ")
            .print(&module.module, &mut wat)
            .expect("Printing WAT failed");
        wat.0
    };
    snapshot(path, "nucleus.wat", || wat_content.clone()).await;
    debug_alt!(
        module_wat = crate::debug::RawString(&wat_content),
        exports = module.exports,
        externals = module.externals,
        "module wat"
    );
    Ok(module)
}
#[instrument(skip(db))]
pub async fn core_module(
    db: &mut Database,
    path: &str,
    item_instances: &HashMap<String, Vec<mantle::Type>>,
) -> Result<core::Module> {
    let mono_mod = monomorph_mantle_module(db, path, item_instances).await?;
    let core_mod = core::closure_convert(mono_mod);
    snapshot(path, "core_module.sexpr", || format!("{core_mod:?}")).await;
    debug_alt!(core_mod, "core module");
    Ok(core_mod)
}
#[instrument(skip(db))]
pub async fn monomorph_mantle_module(
    db: &mut Database,
    path: &str,
    item_instances: &HashMap<String, Vec<mantle::Type>>,
) -> Result<mantle::Module> {
    let simple_mod = simplify_mantle_module(db, path).await?;
    let module = mantle::monomorph_module(simple_mod, item_instances);
    snapshot(path, "mantle_monomorph.sexpr", || format!("{module:?}")).await;
    debug_alt!(module, "monomorph");
    Ok(module)
}

#[instrument(skip(db))]
pub async fn instance_deps(
    db: &mut Database,
    path: &str,
) -> Result<HashMap<mantle::Symbol, Vec<mantle::Type>>> {
    let instances = monomorph_mantle_module(db, path, &Default::default())
        .await?
        .instances;
    debug_alt!(instances, "instance deps");
    Ok(instances)
}
#[instrument(skip(db))]
pub async fn simplify_mantle_module(db: &mut Database, path: &str) -> Result<mantle::Module> {
    let mantle_mod = mantle_module(db, path).await?;
    let module = mantle::simplify_module(mantle_mod);
    snapshot(path, "mantle_simplified.sexpr", || format!("{module:?}")).await;
    debug_alt!(module, "simplified");
    Ok(module)
}
#[instrument(skip(db))]
pub async fn mantle_module(db: &mut Database, path: &str) -> Result<mantle::Module> {
    let typed_mod = typed_mod(db, path).await?;
    let item_source = item_source_for_mod(db, path).await?;
    let module = mantle::lower_module(&item_source, typed_mod);
    snapshot(path, "mantle_lowered.sexpr", || format!("{module:?}")).await;
    debug_alt!(mantle_module = module, "mantle::lower");
    Ok(module)
}

#[instrument(skip(db))]
pub async fn item_source_for_mod(db: &mut Database, path: &str) -> Result<ItemSource> {
    let item_source = if let Some(item_source) = db.item_sources.get_mut(path) {
        item_source.clone()
    } else {
        let mut item_source = db.item_source.clone();
        let imports = imports(db, path).await?;
        for import in &imports {
            let dep = Box::pin(typed_mod(db, &import.path)).await?;
            dep.items
                .into_iter()
                .for_each(|(_item_id, item)| match item {
                    crust::TypedItem::Native(crust::TypedNativeItem { symbol, scheme, .. })
                    | crust::TypedItem::External(crust::ExternalItem { symbol, scheme, .. }) => {
                        item_source.register(
                            crust::Symbol {
                                module: import.path.clone(),
                                field: symbol.field,
                            },
                            scheme,
                        );
                    }
                });
        }
        db.item_sources
            .entry(path.to_string())
            .or_insert(item_source)
            .clone()
    };
    debug_alt!(item_source, "item_source");
    Ok(item_source)
}
#[instrument(skip(db))]
pub async fn typed_mod(db: &mut Database, path: &str) -> Result<crust::TypedModule> {
    let air_mod = parsed(db, path).await?;
    let mut item_source = item_source_for_mod(db, path).await?;
    let module_path = blr_path_to_component_path(path);
    let crust_mod = crust::lower(air_mod, &mut item_source, &module_path);
    snapshot(path, "crust_lowered.sexpr", || format!("{crust_mod:?}")).await;
    // HACK item_source should not be mutable, we store it back in the cache here because we
    // mutated it.
    db.item_sources
        .insert(path.to_string(), item_source.clone());
    debug_alt!(crust_mod, "crust");
    let typed_mod = crust::type_infer_with_items(item_source, crust_mod).context(TypeSnafu {})?;
    snapshot(path, "crust_typed.sexpr", || format!("{typed_mod:?}")).await;
    debug_alt!(typed_mod, "typed crust");
    Ok(typed_mod)
}
#[instrument(skip(db))]
pub async fn imports(db: &mut Database, path: &str) -> Result<Vec<air::Import>> {
    // TODO memoize
    let imports = parsed(db, path).await?.imports;
    debug_alt!(imports, "imports");
    Ok(imports)
}
#[instrument(skip(db))]
pub async fn parsed(db: &mut Database, path: &str) -> Result<air::Module> {
    let src = source(db, path).await?;
    let module = parse(&src)?;
    snapshot(path, "parsed.sexpr", || format!("{module:?}")).await;
    debug_alt!(module, "parsed");
    Ok(module)
}

#[instrument(skip(db))]
pub async fn source(db: &mut Database, path: &str) -> Result<String> {
    if path == "main" {
        Ok(db.root_src.clone())
    } else {
        let mut path = path.split("::").fold(db.path_root.clone(), |path, part| {
            if part == "std" {
                path.join("stdlib").join("src")
            } else {
                path.join(part)
            }
        });
        path.set_extension("blr");

        debug!(path = path.display().to_string(), cwd=?std::env::current_dir(), "source path");
        fs::read_to_string(&path).await.context(IOSnafu {
            path: path.display().to_string(),
        })
    }
}

// TODO make proper query
#[instrument(skip(src), ret(level = Level::TRACE))]
pub fn parse(src: &str) -> Result<air::Module> {
    air::parser::ModuleParser::new().parse(src).map_err(|err| {
        let mut files = SimpleFiles::new();
        let file_id = files.add("stdin.blr", &src);
        let mut diagnostic = Diagnostic::error();
        match err {
            ParseError::InvalidToken { location } => {
                diagnostic = diagnostic
                    .with_message("invalid token")
                    .with_labels(vec![Label::primary(file_id, location..location)]);
            }
            ParseError::UnrecognizedEof { location, expected } => {
                diagnostic = diagnostic
                    .with_message("unrecognized EOF")
                    .with_labels(vec![Label::primary(file_id, location..location)])
                    .with_notes(vec![format!("expected one of {}", expected.join(","))]);
            }
            ParseError::UnrecognizedToken { token, expected } => {
                diagnostic = diagnostic
                    .with_message("unrecognized token")
                    .with_labels(vec![Label::primary(file_id, token.0..token.2)])
                    .with_notes(vec![format!("expected one of {}", expected.join(" "))]);
            }
            ParseError::ExtraToken { token } => {
                diagnostic = diagnostic
                    .with_message("extra token")
                    .with_labels(vec![Label::primary(file_id, token.0..token.2)]);
            }
            ParseError::User { error } => {
                diagnostic = diagnostic.with_message(error);
            }
        };
        let mut writer = Buffer::no_color();
        let config = codespan_reporting::term::Config::default();
        term::emit(&mut writer, &config, &files, &diagnostic).unwrap();
        let message = String::from_utf8(writer.into_inner()).unwrap();
        Error::Parse { message }
    })
}

//HACK need a real system here to map blr module paths to/from wasm component package/interface
//paths
fn blr_path_to_component_path(path: &str) -> String {
    let short_path = path.strip_prefix("std::").unwrap_or(path);
    format!("blr:{short_path}/{short_path}-interface")
}