use std::fmt;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum StorageMode {
Memory,
Mapped,
Disk,
}
impl StorageMode {
pub fn as_str(self) -> &'static str {
match self {
StorageMode::Memory => "memory",
StorageMode::Mapped => "mapped",
StorageMode::Disk => "disk",
}
}
}
impl fmt::Display for StorageMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for StorageMode {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"memory" => Ok(StorageMode::Memory),
"mapped" => Ok(StorageMode::Mapped),
"disk" => Ok(StorageMode::Disk),
other => Err(format!(
"unknown storage mode '{other}'; expected memory|mapped|disk"
)),
}
}
}
#[derive(Debug, Clone)]
pub struct Workdir {
root: PathBuf,
}
impl Workdir {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn raw_dir(&self) -> PathBuf {
self.root.join("raw")
}
pub fn raw_index_dir(&self) -> PathBuf {
self.raw_dir().join("index")
}
pub fn raw_master_idx(&self, year: u16, quarter: u8) -> PathBuf {
self.raw_index_dir()
.join(format!("master.{year}_QTR{quarter}.idx"))
}
pub fn raw_submissions_dir(&self) -> PathBuf {
self.raw_dir().join("submissions")
}
pub fn raw_submissions_zip(&self) -> PathBuf {
self.raw_submissions_dir().join("submissions.zip")
}
pub fn raw_insider_dir(&self) -> PathBuf {
self.raw_dir().join("insider")
}
pub fn raw_form13f_dir(&self) -> PathBuf {
self.raw_dir().join("form13f")
}
pub fn raw_financials_dir(&self) -> PathBuf {
self.raw_dir().join("financials")
}
pub fn raw_filings_dir(&self) -> PathBuf {
self.raw_dir().join("filings")
}
pub fn raw_company_tickers_json(&self) -> PathBuf {
self.raw_dir().join("company_tickers.json")
}
pub fn raw_manifest(&self) -> PathBuf {
self.raw_dir().join("raw_manifest.json")
}
pub fn processed_dir(&self) -> PathBuf {
self.root.join("processed")
}
pub fn processed_csv(&self, name: &str) -> PathBuf {
self.processed_dir().join(format!("{name}.csv"))
}
pub fn processed_manifest(&self) -> PathBuf {
self.processed_dir().join("processed_manifest.json")
}
pub fn graph_dir(&self, mode: StorageMode) -> PathBuf {
self.root.join("graph").join(mode.as_str())
}
pub fn graph_kgl(&self, mode: StorageMode) -> PathBuf {
self.graph_dir(mode).join("sec.kgl")
}
pub fn graph_manifest(&self, mode: StorageMode) -> PathBuf {
self.graph_dir(mode).join("graph_manifest.json")
}
pub fn graph_exists(&self, mode: StorageMode) -> bool {
match mode {
StorageMode::Memory | StorageMode::Mapped => self.graph_kgl(mode).is_file(),
StorageMode::Disk => self.graph_manifest(mode).is_file(),
}
}
pub fn ensure_dirs(&self, mode: Option<StorageMode>) -> std::io::Result<()> {
std::fs::create_dir_all(self.raw_index_dir())?;
std::fs::create_dir_all(self.raw_submissions_dir())?;
std::fs::create_dir_all(self.raw_insider_dir())?;
std::fs::create_dir_all(self.raw_form13f_dir())?;
std::fs::create_dir_all(self.raw_financials_dir())?;
std::fs::create_dir_all(self.raw_filings_dir())?;
std::fs::create_dir_all(self.processed_dir())?;
if let Some(m) = mode {
std::fs::create_dir_all(self.graph_dir(m))?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn paths_are_well_formed() {
let w = Workdir::new("/tmp/sec");
assert_eq!(w.raw_dir(), Path::new("/tmp/sec/raw"));
assert_eq!(
w.raw_master_idx(2024, 4),
Path::new("/tmp/sec/raw/index/master.2024_QTR4.idx")
);
assert_eq!(
w.raw_submissions_zip(),
Path::new("/tmp/sec/raw/submissions/submissions.zip")
);
assert_eq!(
w.processed_csv("company"),
Path::new("/tmp/sec/processed/company.csv")
);
assert_eq!(
w.graph_kgl(StorageMode::Mapped),
Path::new("/tmp/sec/graph/mapped/sec.kgl")
);
assert_eq!(
w.graph_dir(StorageMode::Disk),
Path::new("/tmp/sec/graph/disk")
);
}
#[test]
fn storage_mode_roundtrip() {
for m in [StorageMode::Memory, StorageMode::Mapped, StorageMode::Disk] {
let parsed: StorageMode = m.as_str().parse().unwrap();
assert_eq!(parsed, m);
}
assert!("bogus".parse::<StorageMode>().is_err());
}
#[test]
fn ensure_dirs_idempotent() {
let tmp = tempdir();
let w = Workdir::new(&tmp);
w.ensure_dirs(Some(StorageMode::Mapped)).unwrap();
w.ensure_dirs(Some(StorageMode::Mapped)).unwrap(); assert!(w.raw_index_dir().is_dir());
assert!(w.processed_dir().is_dir());
assert!(w.graph_dir(StorageMode::Mapped).is_dir());
std::fs::remove_dir_all(&tmp).ok();
}
#[test]
fn graph_exists_false_for_empty_workdir() {
let tmp = tempdir();
let w = Workdir::new(&tmp);
assert!(!w.graph_exists(StorageMode::Memory));
assert!(!w.graph_exists(StorageMode::Mapped));
assert!(!w.graph_exists(StorageMode::Disk));
std::fs::remove_dir_all(&tmp).ok();
}
fn tempdir() -> PathBuf {
let dir = std::env::temp_dir().join(format!(
"kglite-sec-test-{}-{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
));
std::fs::create_dir_all(&dir).unwrap();
dir
}
}