use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use super::paths::slugify_source_id;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum RawKind {
Email,
Chat,
Document,
Contact,
Post,
Commit,
Issue,
PullRequest,
}
impl RawKind {
pub const fn as_dir(&self) -> &'static str {
match self {
Self::Email => "emails",
Self::Chat => "chats",
Self::Document => "documents",
Self::Contact => "contacts",
Self::Post => "posts",
Self::Commit => "commits",
Self::Issue => "issues",
Self::PullRequest => "prs",
}
}
}
pub struct RawItem<'a> {
pub uid: &'a str,
pub created_at_ms: i64,
pub markdown: &'a str,
pub kind: RawKind,
}
pub fn write_raw_items(
content_root: &Path,
source_id: &str,
items: &[RawItem<'_>],
) -> Result<usize> {
if items.is_empty() {
return Ok(0);
}
let mut written = 0usize;
for item in items {
let dir = raw_kind_dir(content_root, source_id, item.kind);
fs::create_dir_all(&dir).with_context(|| format!("create raw dir {}", dir.display()))?;
let filename = build_filename(item.created_at_ms, item.uid);
let path = dir.join(&filename);
write_atomic(&path, item.markdown.as_bytes())
.with_context(|| format!("write raw file {}", path.display()))?;
written += 1;
}
Ok(written)
}
pub fn raw_source_dir(content_root: &Path, source_id: &str) -> PathBuf {
let slug = slugify_source_id(source_id);
content_root.join("raw").join(slug)
}
pub fn raw_kind_dir(content_root: &Path, source_id: &str, kind: RawKind) -> PathBuf {
raw_source_dir(content_root, source_id).join(kind.as_dir())
}
pub fn raw_rel_path(source_id: &str, kind: RawKind, created_at_ms: i64, uid: &str) -> String {
let slug = slugify_source_id(source_id);
let filename = build_filename(created_at_ms, uid);
format!("raw/{}/{}/{}", slug, kind.as_dir(), filename)
}
fn build_filename(created_at_ms: i64, uid: &str) -> String {
let ts = created_at_ms.max(0);
let uid = sanitize_uid(uid);
format!("{ts}_{uid}.md")
}
pub(crate) fn sanitize_uid(uid: &str) -> String {
let cleaned: String = uid
.chars()
.map(|c| match c {
'\\' | '/' | ':' | '*' | '?' | '"' | '<' | '>' | '|' | ' ' => '-',
other => other,
})
.collect();
if cleaned.is_empty() {
"unknown".into()
} else {
cleaned
}
}
fn write_atomic(path: &Path, bytes: &[u8]) -> Result<()> {
let parent = path
.parent()
.ok_or_else(|| anyhow::anyhow!("path has no parent: {}", path.display()))?;
let tmp = parent.join(format!(
".tmp_raw_{}_{}.md",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
));
let mut f = fs::File::create(&tmp).with_context(|| format!("create tmp {}", tmp.display()))?;
f.write_all(bytes)
.with_context(|| format!("write tmp {}", tmp.display()))?;
f.sync_all()
.with_context(|| format!("fsync tmp {}", tmp.display()))?;
drop(f);
fs::rename(&tmp, path)
.with_context(|| format!("rename {} -> {}", tmp.display(), path.display()))?;
if let Ok(dir_handle) = fs::File::open(parent) {
let _ = dir_handle.sync_all();
}
Ok(())
}
pub fn slug_account_email(email: &str) -> String {
let lower = email.trim().to_lowercase();
let mut out = String::with_capacity(lower.len() + 8);
let mut last_dash = true;
let mut chars = lower.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'@' => {
if !last_dash {
out.push('-');
}
out.push_str("at-");
last_dash = true;
}
'.' => {
if !last_dash {
out.push('-');
}
out.push_str("dot-");
last_dash = true;
}
c if c.is_ascii_alphanumeric() => {
out.push(c);
last_dash = false;
}
_ => {
if !last_dash {
out.push('-');
last_dash = true;
}
}
}
}
let trimmed = out.trim_end_matches('-').trim_start_matches('-');
if trimmed.is_empty() {
"unknown".into()
} else {
trimmed.to_string()
}
}
#[cfg(test)]
#[path = "raw_tests.rs"]
mod tests;