use std::path::{Path, PathBuf};
use anyhow::{bail, Context};
use bytesize::ByteSize;
use rusqlite::{params, OptionalExtension};
use sysinfo::{CpuRefreshKind, RefreshKind};
use crate::corpus::{Engine, Run};
pub(crate) type Id = u32;
pub(crate) struct Repo {
pub(crate) id: Id,
pub(crate) path: PathBuf,
pub(crate) odb_size: ByteSize,
pub(crate) num_objects: u64,
pub(crate) num_references: usize,
}
impl Repo {
pub(crate) fn try_from(repo: &gix::Repository) -> anyhow::Result<Self> {
let num_references = repo.refs.iter()?.all()?.count();
let num_objects = repo.objects.packed_object_count()?;
let odb_size = ByteSize(
std::fs::read_dir(repo.objects.store_ref().path().join("pack"))
.map(|dir| {
dir.filter_map(Result::ok)
.filter_map(|e| e.metadata().ok())
.filter_map(|m| m.is_file().then_some(m.len()))
.sum()
})
.unwrap_or_default(),
);
Ok(Repo {
id: 0,
path: repo.path().to_owned(),
odb_size,
num_objects,
num_references,
})
}
}
const VERSION: usize = 1;
pub fn create(path: impl AsRef<std::path::Path>) -> anyhow::Result<rusqlite::Connection> {
let path = path.as_ref();
let con = rusqlite::Connection::open(path)?;
let meta_table = r#"
CREATE TABLE if not exists meta(
version int
)"#;
con.execute_batch(meta_table)?;
let version: Option<usize> = con.query_row("SELECT version FROM meta", [], |r| r.get(0)).optional()?;
match version {
None => {
con.execute("INSERT into meta(version) values(?)", params![VERSION])?;
}
Some(version) if version != VERSION => match con.close() {
Ok(()) => {
bail!("Cannot handle database with version {version}, cannot yet migrate to {VERSION} - maybe migrate by hand?");
}
Err((_, err)) => return Err(err.into()),
},
_ => {}
}
con.execute_batch("PRAGMA synchronous = OFF; PRAGMA journal_mode = WAL; PRAGMA wal_checkpoint(FULL); ")?;
con.execute_batch(
r#"
CREATE TABLE if not exists runner(
id integer PRIMARY KEY,
vendor text,
brand text,
host_name text, -- this is just to help ID the runner
UNIQUE (vendor, brand)
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists corpus(
id integer PRIMARY KEY,
root text UNIQUE -- the root path of all repositories we want to consider, as canonicalized path
)
"#,
)?;
con.execute_batch(
r"
CREATE TABLE if not exists repository(
id integer PRIMARY KEY,
rela_path text, -- the path to the repository on disk, relative to the corpus root path, without leading `./` or `.\`
corpus integer,
odb_size integer, -- the object database size in bytes
num_references integer, -- the total amount of references
num_objects integer, -- the total amount of objects
FOREIGN KEY (corpus) REFERENCES corpus (id)
UNIQUE (rela_path, corpus)
)
",
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists gitoxide_version(
id integer PRIMARY KEY,
version text UNIQUE -- the unique git version via gix describe
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists task(
id integer PRIMARY KEY,
short_name UNIQUE, -- the unique and permanent identifier for the task
description text UNIQUE -- the descriptive name of the task, it can be changed at will
)
"#,
)?;
con.execute_batch(
r#"
CREATE TABLE if not exists run(
id integer PRIMARY KEY,
repository integer,
runner integer,
task integer,
gitoxide_version integer,
insertion_time integer NOT NULL, -- in seconds since UNIX epoch
duration real, -- in seconds or NULL if not yet finished (either successful or with failure)
error text, -- or NULL if there was no error
spans_json text, -- all spans collecteted while performing the run
FOREIGN KEY (repository) REFERENCES repository (id),
FOREIGN KEY (task) REFERENCES task (id),
FOREIGN KEY (runner) REFERENCES runner (id),
FOREIGN KEY (gitoxide_version) REFERENCES gitoxide_version (id)
)
"#,
)?;
Ok(con)
}
impl Engine {
pub(crate) fn runner_id_or_insert(&self) -> anyhow::Result<Id> {
let sys = sysinfo::System::new_with_specifics(
RefreshKind::nothing().with_cpu(CpuRefreshKind::nothing().with_frequency()),
);
let cpu = &sys.cpus()[0];
let vendor = Some(cpu.vendor_id().to_owned());
let host = sysinfo::System::host_name();
let brand = Some(cpu.brand().to_owned());
Ok(self.con.query_row(
"INSERT INTO runner (vendor, brand, host_name) VALUES (?1, ?2, ?3) \
ON CONFLICT DO UPDATE SET vendor = vendor, brand = brand, host_name = ?3 RETURNING id",
[vendor.as_deref(), brand.as_deref(), host.as_deref()],
|r| r.get(0),
)?)
}
pub(crate) fn corpus_id_or_insert(&self, path: &Path) -> anyhow::Result<Id> {
let path = path.to_str().context("corpus root cannot contain illformed UTF-8")?;
Ok(self.con.query_row(
"INSERT INTO corpus (root) VALUES (?1) \
ON CONFLICT DO UPDATE SET root = root RETURNING id",
[path],
|r| r.get(0),
)?)
}
pub(crate) fn gitoxide_version_id_or_insert(&self) -> anyhow::Result<Id> {
Ok(self
.con
.query_row(
"INSERT INTO gitoxide_version (version) VALUES (?1) ON CONFLICT DO UPDATE SET version = version RETURNING id",
[&self.state.gitoxide_version],
|r| r.get(0),
)?)
}
pub(crate) fn tasks_or_insert(
&self,
allowed_short_names: &[String],
) -> anyhow::Result<Vec<(Id, &'static super::Task)>> {
let mut out: Vec<_> = super::run::ALL
.iter()
.filter(|task| {
if allowed_short_names.is_empty() {
true
} else {
allowed_short_names.iter().any(|allowed| task.short_name == allowed)
}
})
.map(|task| (0, task))
.collect();
for (id, task) in &mut out {
*id = self.con.query_row(
"INSERT INTO task (short_name, description) VALUES (?1, ?2) ON CONFLICT DO UPDATE SET short_name = short_name, description = ?2 RETURNING id",
[task.short_name, task.description],
|r| r.get(0),
)?;
}
Ok(out)
}
pub(crate) fn insert_run(
con: &rusqlite::Connection,
gitoxide_version: Id,
runner: Id,
task: Id,
repository: Id,
) -> anyhow::Result<Run> {
let insertion_time = std::time::UNIX_EPOCH.elapsed()?.as_secs();
let id = con.query_row("INSERT INTO run (gitoxide_version, runner, task, repository, insertion_time) VALUES (?1, ?2, ?3, ?4, ?5) RETURNING id", params![gitoxide_version, runner, task, repository, insertion_time], |r| r.get(0))?;
Ok(Run {
id,
duration: Default::default(),
error: None,
})
}
pub(crate) fn update_run(con: &rusqlite::Connection, run: Run) -> anyhow::Result<()> {
con.execute(
"UPDATE run SET duration = ?2, error = ?3 WHERE id = ?1",
params![run.id, run.duration.as_secs_f64(), run.error.as_deref()],
)?;
Ok(())
}
}