use std::fs;
use std::io::{self, Read as _, Write as _};
use std::path::{Path, PathBuf};
use sha2::{Digest, Sha256};
use crate::chunk::{CachedChunk, Chunk};
use crate::compiler::CompilerOptions;
use crate::module_artifact::ModuleArtifact;
pub const MAGIC: &[u8; 8] = b"HARNBC\0\0";
pub const SCHEMA_VERSION: u32 = 2;
pub const HARN_VERSION: &str = env!("CARGO_PKG_VERSION");
pub const CACHE_EXTENSION: &str = "harnbc";
pub const MODULE_CACHE_EXTENSION: &str = "harnmod";
const KIND_ENTRY_CHUNK: u8 = 1;
const KIND_MODULE_ARTIFACT: u8 = 2;
pub const CACHE_DIR_ENV: &str = "HARN_CACHE_DIR";
pub const CACHE_ENABLED_ENV: &str = "HARN_BYTECODE_CACHE";
pub struct LookupOutcome {
pub key: CacheKey,
pub chunk: Option<Chunk>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CacheKey {
pub source_hash: [u8; 32],
pub import_graph_hash: [u8; 32],
pub harn_version: &'static str,
pub compiler_tag: u8,
}
impl CacheKey {
pub fn from_source(source_path: &Path, source: &str) -> Self {
let source_hash = sha256(source.as_bytes());
let import_graph_hash = hash_transitive_user_imports(source_path, source);
Self {
source_hash,
import_graph_hash,
harn_version: HARN_VERSION,
compiler_tag: compiler_options_tag(CompilerOptions::from_env()),
}
}
pub fn filename(&self) -> String {
format!("{}.{}", hex(&self.source_hash), CACHE_EXTENSION)
}
pub fn module_filename(&self) -> String {
format!("{}.{}", hex(&self.source_hash), MODULE_CACHE_EXTENSION)
}
}
pub fn cache_dir() -> PathBuf {
if let Some(custom) = std::env::var_os(CACHE_DIR_ENV) {
return PathBuf::from(custom);
}
if let Some(xdg) = std::env::var_os("XDG_CACHE_HOME") {
let xdg = PathBuf::from(xdg);
if !xdg.as_os_str().is_empty() {
return xdg.join("harn").join("bytecode");
}
}
if let Some(home) = std::env::var_os("HOME") {
return PathBuf::from(home)
.join(".cache")
.join("harn")
.join("bytecode");
}
PathBuf::from(".harn-cache").join("bytecode")
}
pub fn cache_enabled() -> bool {
match std::env::var(CACHE_ENABLED_ENV).ok().as_deref() {
Some(value) => !matches!(
value.to_ascii_lowercase().as_str(),
"0" | "false" | "no" | "off"
),
None => true,
}
}
pub fn load(source_path: &Path, source: &str) -> LookupOutcome {
let key = CacheKey::from_source(source_path, source);
if !cache_enabled() {
return LookupOutcome { key, chunk: None };
}
let mut candidates: Vec<PathBuf> = Vec::with_capacity(2);
if let Some(adjacent) = adjacent_cache_path(source_path) {
candidates.push(adjacent);
}
candidates.push(cache_dir().join(key.filename()));
for path in candidates {
match read_chunk_if_matches(&path, &key) {
Ok(Some(chunk)) => {
return LookupOutcome {
key,
chunk: Some(chunk),
}
}
Ok(None) => continue,
Err(_) => continue,
}
}
LookupOutcome { key, chunk: None }
}
pub fn store(key: &CacheKey, chunk: &Chunk) -> io::Result<()> {
if !cache_enabled() {
return Ok(());
}
let dir = cache_dir();
fs::create_dir_all(&dir)?;
write_atomic_chunk(&dir.join(key.filename()), key, chunk)
}
pub fn store_at(path: &Path, key: &CacheKey, chunk: &Chunk) -> io::Result<()> {
ensure_parent_dir(path)?;
write_atomic_chunk(path, key, chunk)
}
pub fn load_module(source_path: &Path, source: &str) -> ModuleLookupOutcome {
let key = CacheKey::from_source(source_path, source);
if !cache_enabled() {
return ModuleLookupOutcome {
key,
artifact: None,
};
}
let mut candidates: Vec<PathBuf> = Vec::with_capacity(2);
if let Some(adjacent) = adjacent_module_cache_path(source_path) {
candidates.push(adjacent);
}
candidates.push(cache_dir().join(key.module_filename()));
for path in candidates {
match read_module_if_matches(&path, &key) {
Ok(Some(artifact)) => {
return ModuleLookupOutcome {
key,
artifact: Some(artifact),
}
}
Ok(None) => continue,
Err(_) => continue,
}
}
ModuleLookupOutcome {
key,
artifact: None,
}
}
pub fn store_module(key: &CacheKey, artifact: &ModuleArtifact) -> io::Result<()> {
if !cache_enabled() {
return Ok(());
}
let dir = cache_dir();
fs::create_dir_all(&dir)?;
write_atomic_module(&dir.join(key.module_filename()), key, artifact)
}
pub fn store_module_at(path: &Path, key: &CacheKey, artifact: &ModuleArtifact) -> io::Result<()> {
ensure_parent_dir(path)?;
write_atomic_module(path, key, artifact)
}
pub struct ModuleLookupOutcome {
pub key: CacheKey,
pub artifact: Option<ModuleArtifact>,
}
pub fn adjacent_cache_path(source_path: &Path) -> Option<PathBuf> {
adjacent_path_with_extension(source_path, CACHE_EXTENSION)
}
pub fn adjacent_module_cache_path(source_path: &Path) -> Option<PathBuf> {
adjacent_path_with_extension(source_path, MODULE_CACHE_EXTENSION)
}
fn adjacent_path_with_extension(source_path: &Path, ext: &str) -> Option<PathBuf> {
let stem = source_path.file_stem()?;
if stem.is_empty() {
return None;
}
let parent = source_path.parent().unwrap_or_else(|| Path::new(""));
let mut out = parent.join(stem);
out.set_extension(ext);
Some(out)
}
fn ensure_parent_dir(path: &Path) -> io::Result<()> {
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
fs::create_dir_all(parent)?;
}
}
Ok(())
}
fn write_atomic_chunk(target: &Path, key: &CacheKey, chunk: &Chunk) -> io::Result<()> {
let cached = chunk.freeze_for_cache();
let payload = bincode::serialize(&cached)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
write_atomic(target, key, KIND_ENTRY_CHUNK, &payload)
}
fn write_atomic_module(target: &Path, key: &CacheKey, artifact: &ModuleArtifact) -> io::Result<()> {
let payload = bincode::serialize(artifact)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
write_atomic(target, key, KIND_MODULE_ARTIFACT, &payload)
}
fn write_atomic(target: &Path, key: &CacheKey, kind: u8, payload: &[u8]) -> io::Result<()> {
let mut buf: Vec<u8> = Vec::with_capacity(payload.len() + 128);
buf.extend_from_slice(MAGIC);
buf.extend_from_slice(&SCHEMA_VERSION.to_le_bytes());
let version_bytes = HARN_VERSION.as_bytes();
buf.extend_from_slice(&(version_bytes.len() as u32).to_le_bytes());
buf.extend_from_slice(version_bytes);
buf.push(key.compiler_tag);
buf.push(kind);
buf.extend_from_slice(&key.source_hash);
buf.extend_from_slice(&key.import_graph_hash);
buf.extend_from_slice(payload);
let tmp_name = match target.file_name() {
Some(name) => format!(".{}.{}.tmp", name.to_string_lossy(), std::process::id(),),
None => format!(".harn-cache.{}.tmp", std::process::id()),
};
let tmp_path = target.with_file_name(tmp_name);
let mut tmp_file = fs::File::create(&tmp_path)?;
tmp_file.write_all(&buf)?;
tmp_file.sync_all()?;
drop(tmp_file);
match fs::rename(&tmp_path, target) {
Ok(()) => Ok(()),
Err(err) => {
let _ = fs::remove_file(&tmp_path);
Err(err)
}
}
}
struct ParsedHeader {
kind: u8,
payload: Vec<u8>,
}
fn read_header_if_matches(path: &Path, key: &CacheKey) -> io::Result<Option<ParsedHeader>> {
let mut file = match fs::File::open(path) {
Ok(f) => f,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err),
};
let mut header = [0u8; 8 + 4 + 4];
if file.read_exact(&mut header).is_err() {
return Ok(None);
}
if &header[..8] != MAGIC {
return Ok(None);
}
let schema = u32::from_le_bytes(header[8..12].try_into().unwrap());
if schema != SCHEMA_VERSION {
return Ok(None);
}
let version_len = u32::from_le_bytes(header[12..16].try_into().unwrap()) as usize;
if version_len > 256 {
return Ok(None);
}
let mut version_buf = vec![0u8; version_len];
if file.read_exact(&mut version_buf).is_err() {
return Ok(None);
}
if version_buf != key.harn_version.as_bytes() {
return Ok(None);
}
let mut compiler_and_kind = [0u8; 2];
if file.read_exact(&mut compiler_and_kind).is_err() {
return Ok(None);
}
if compiler_and_kind[0] != key.compiler_tag {
return Ok(None);
}
let kind = compiler_and_kind[1];
let mut hashes = [0u8; 64];
if file.read_exact(&mut hashes).is_err() {
return Ok(None);
}
if hashes[..32] != key.source_hash || hashes[32..] != key.import_graph_hash {
return Ok(None);
}
let mut payload = Vec::new();
if file.read_to_end(&mut payload).is_err() {
return Ok(None);
}
Ok(Some(ParsedHeader { kind, payload }))
}
fn read_chunk_if_matches(path: &Path, key: &CacheKey) -> io::Result<Option<Chunk>> {
let Some(header) = read_header_if_matches(path, key)? else {
return Ok(None);
};
if header.kind != KIND_ENTRY_CHUNK {
return Ok(None);
}
let cached: CachedChunk = match bincode::deserialize(&header.payload) {
Ok(c) => c,
Err(_) => return Ok(None),
};
Ok(Some(Chunk::from_cached(&cached)))
}
fn read_module_if_matches(path: &Path, key: &CacheKey) -> io::Result<Option<ModuleArtifact>> {
let Some(header) = read_header_if_matches(path, key)? else {
return Ok(None);
};
if header.kind != KIND_MODULE_ARTIFACT {
return Ok(None);
}
match bincode::deserialize::<ModuleArtifact>(&header.payload) {
Ok(artifact) => Ok(Some(artifact)),
Err(_) => Ok(None),
}
}
fn compiler_options_tag(options: CompilerOptions) -> u8 {
let mut tag: u8 = 0;
if options.optimizations_enabled() {
tag |= 0b0000_0001;
}
tag
}
fn sha256(bytes: &[u8]) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(bytes);
hasher.finalize().into()
}
fn hex(bytes: &[u8]) -> String {
let mut out = String::with_capacity(bytes.len() * 2);
for byte in bytes {
out.push_str(&format!("{byte:02x}"));
}
out
}
fn collect_user_imports(source: &str) -> Vec<String> {
let scrubbed = strip_comments(source);
let mut out: Vec<String> = Vec::new();
let bytes = scrubbed.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'"' {
match read_string_literal(bytes, i) {
Some((_, end)) => {
i = end;
continue;
}
None => {
i += 1;
continue;
}
}
}
if !matches_keyword(bytes, i, b"import") {
i += 1;
continue;
}
let mut j = i + b"import".len();
let mut depth = 0i32;
while j < bytes.len() {
match bytes[j] {
b'"' => {
if let Some((path, end)) = read_string_literal(bytes, j) {
if !path.starts_with("std/") {
out.push(path);
}
i = end;
break;
}
j += 1;
}
b'{' => {
depth += 1;
j += 1;
}
b'}' => {
depth -= 1;
j += 1;
}
b'\n' if depth == 0 => {
i = j;
break;
}
_ => j += 1,
}
}
if j >= bytes.len() {
break;
}
if i < j {
i = j;
}
}
out
}
fn matches_keyword(bytes: &[u8], at: usize, keyword: &[u8]) -> bool {
let end = at + keyword.len();
if end > bytes.len() {
return false;
}
if &bytes[at..end] != keyword {
return false;
}
if at > 0 && is_ident_char(bytes[at - 1]) {
return false;
}
if end < bytes.len() && is_ident_char(bytes[end]) {
return false;
}
true
}
fn is_ident_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
fn read_string_literal(bytes: &[u8], at: usize) -> Option<(String, usize)> {
debug_assert_eq!(bytes[at], b'"');
let mut out = String::new();
let mut i = at + 1;
while i < bytes.len() {
match bytes[i] {
b'"' => return Some((out, i + 1)),
b'\\' => {
if i + 1 >= bytes.len() {
return None;
}
match bytes[i + 1] {
b'"' => out.push('"'),
b'\\' => out.push('\\'),
b'n' => out.push('\n'),
b'r' => out.push('\r'),
b't' => out.push('\t'),
other => out.push(other as char),
}
i += 2;
}
b'\n' => return None,
byte => {
out.push(byte as char);
i += 1;
}
}
}
None
}
fn strip_comments(source: &str) -> String {
let bytes = source.as_bytes();
let mut out = String::with_capacity(source.len());
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
i += 2;
while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
i += 1;
}
i = (i + 2).min(bytes.len());
continue;
}
if bytes[i] == b'"' {
if let Some((_, end)) = read_string_literal(bytes, i) {
out.push_str(&source[i..end]);
i = end;
continue;
}
}
out.push(bytes[i] as char);
i += 1;
}
out
}
fn hash_transitive_user_imports(source_path: &Path, source: &str) -> [u8; 32] {
let mut visited: std::collections::BTreeMap<PathBuf, ImportNode> =
std::collections::BTreeMap::new();
let mut frontier: Vec<(PathBuf, String)> = collect_user_imports(source)
.into_iter()
.map(|import| (source_path.to_path_buf(), import))
.collect();
while let Some((anchor, import)) = frontier.pop() {
let Some(resolved) = harn_modules::resolve_import_path(&anchor, &import) else {
let sentinel = anchor.join(format!("__unresolved__/{import}"));
visited
.entry(sentinel)
.or_insert(ImportNode::Unresolved { import });
continue;
};
let canonical = resolved.canonicalize().unwrap_or_else(|_| resolved.clone());
if visited.contains_key(&canonical) {
continue;
}
match fs::read_to_string(&resolved) {
Ok(content) => {
let nested = collect_user_imports(&content);
visited.insert(
canonical.clone(),
ImportNode::Resolved {
content: content.clone(),
},
);
for nested_import in nested {
frontier.push((resolved.clone(), nested_import));
}
}
Err(err) => {
visited.insert(
canonical,
ImportNode::IoError {
kind: err.kind().to_string(),
},
);
}
}
}
let mut hasher = Sha256::new();
for (path, node) in &visited {
hasher.update(path.to_string_lossy().as_bytes());
hasher.update(b"\0");
match node {
ImportNode::Resolved { content } => {
hasher.update(b"resolved\0");
hasher.update(content.as_bytes());
}
ImportNode::Unresolved { import } => {
hasher.update(b"unresolved\0");
hasher.update(import.as_bytes());
}
ImportNode::IoError { kind } => {
hasher.update(b"ioerror\0");
hasher.update(kind.as_bytes());
}
}
hasher.update(b"\0");
}
hasher.finalize().into()
}
enum ImportNode {
Resolved { content: String },
Unresolved { import: String },
IoError { kind: String },
}
#[cfg(test)]
mod tests {
use super::*;
use crate::compile_source;
#[test]
fn header_round_trips_chunk() {
let chunk = compile_source("println(\"hello\")").expect("compile");
let key = CacheKey::from_source(Path::new("/tmp/example.harn"), "println(\"hello\")");
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("entry.harnbc");
store_at(&path, &key, &chunk).expect("write");
let loaded = read_chunk_if_matches(&path, &key).unwrap();
assert!(loaded.is_some(), "expected cached chunk to load");
}
#[test]
fn header_mismatch_returns_none() {
let chunk = compile_source("1 + 1").expect("compile");
let key = CacheKey::from_source(Path::new("/tmp/a.harn"), "1 + 1");
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("a.harnbc");
store_at(&path, &key, &chunk).expect("write");
let other = CacheKey {
source_hash: [0xAB; 32],
import_graph_hash: key.import_graph_hash,
harn_version: HARN_VERSION,
compiler_tag: key.compiler_tag,
};
assert!(read_chunk_if_matches(&path, &other).unwrap().is_none());
}
#[test]
fn compiler_tag_mismatch_returns_none() {
let chunk = compile_source("1 + 1").expect("compile");
let key = CacheKey::from_source(Path::new("/tmp/b.harn"), "1 + 1");
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("b.harnbc");
store_at(&path, &key, &chunk).expect("write");
let other = CacheKey {
compiler_tag: key.compiler_tag ^ 0xFF,
..key.clone()
};
assert!(
read_chunk_if_matches(&path, &other).unwrap().is_none(),
"flipped HARN_DISABLE_OPTIMIZATIONS must not reuse a chunk \
compiled under the opposite setting"
);
}
#[test]
fn collect_user_imports_ignores_stdlib_and_comments() {
let source = r#"
// import "comment/should/be/ignored"
import "std/agents"
import { foo } from "pkg/bar"
import "./relative/path"
"#;
let imports = collect_user_imports(source);
assert_eq!(
imports,
vec!["pkg/bar".to_string(), "./relative/path".to_string()]
);
}
#[test]
fn cache_enabled_respects_env() {
std::env::set_var(CACHE_ENABLED_ENV, "0");
assert!(!cache_enabled());
std::env::set_var(CACHE_ENABLED_ENV, "1");
assert!(cache_enabled());
std::env::remove_var(CACHE_ENABLED_ENV);
assert!(cache_enabled());
}
#[test]
fn import_path_inside_string_literal_is_ignored() {
let source = r#"
let payload = "import { foo } from \"./other\""
import "./real"
"#;
let imports = collect_user_imports(source);
assert_eq!(imports, vec!["./real".to_string()]);
}
#[test]
fn import_hash_is_stable_across_import_order() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join("a.harn"),
"pub fn a() -> int { return 1 }\n",
)
.unwrap();
std::fs::write(
tmp.path().join("b.harn"),
"pub fn b() -> int { return 2 }\n",
)
.unwrap();
let ab = tmp.path().join("entry_ab.harn");
std::fs::write(&ab, "import \"./a\"\nimport \"./b\"\nprintln(\"hi\")\n").unwrap();
let ba = tmp.path().join("entry_ba.harn");
std::fs::write(&ba, "import \"./b\"\nimport \"./a\"\nprintln(\"hi\")\n").unwrap();
let hash_ab = hash_transitive_user_imports(&ab, &std::fs::read_to_string(&ab).unwrap());
let hash_ba = hash_transitive_user_imports(&ba, &std::fs::read_to_string(&ba).unwrap());
assert_eq!(
hash_ab, hash_ba,
"import-graph hash must be order-independent so reordering imports \
does not bust the cache"
);
}
#[test]
fn import_hash_picks_up_nested_imports() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(
tmp.path().join("leaf.harn"),
"pub fn x() -> int { return 1 }\n",
)
.unwrap();
std::fs::write(
tmp.path().join("mid.harn"),
"import \"./leaf\"\npub fn y() -> int { return 2 }\n",
)
.unwrap();
let entry = tmp.path().join("entry.harn");
std::fs::write(&entry, "import \"./mid\"\nprintln(\"hi\")\n").unwrap();
let before =
hash_transitive_user_imports(&entry, &std::fs::read_to_string(&entry).unwrap());
std::fs::write(
tmp.path().join("leaf.harn"),
"pub fn x() -> int { return 999 }\n",
)
.unwrap();
let after = hash_transitive_user_imports(&entry, &std::fs::read_to_string(&entry).unwrap());
assert_ne!(
before, after,
"editing a transitively-imported file must change the import-graph hash"
);
}
}