use serde::Deserialize;
use crate::types::{
DirectiveData, DirectiveWrapper, DocumentData, PluginError, PluginInput, PluginOp, PluginOutput,
};
use super::super::{NativePlugin, SynthPlugin};
const MAX_SCAN_DEPTH: usize = 32;
pub struct DocumentDiscoveryPlugin;
pub const DOCUMENT_DISCOVERY_NAME: &str = "document_discovery";
#[derive(Debug, Deserialize)]
struct DocumentDiscoveryConfig {
base_dir: std::path::PathBuf,
directories: Vec<String>,
}
#[must_use]
pub fn document_discovery_config(base_dir: &std::path::Path, directories: &[String]) -> String {
serde_json::json!({
"base_dir": base_dir,
"directories": directories,
})
.to_string()
}
impl NativePlugin for DocumentDiscoveryPlugin {
fn name(&self) -> &'static str {
DOCUMENT_DISCOVERY_NAME
}
fn description(&self) -> &'static str {
"Auto-discover documents from directories"
}
fn process(&self, input: PluginInput) -> PluginOutput {
use std::path::Path;
let Some(config_json) = input.config.as_deref() else {
return PluginOutput {
ops: (0..input.directives.len()).map(PluginOp::Keep).collect(),
errors: Vec::new(),
};
};
let config: DocumentDiscoveryConfig = match serde_json::from_str(config_json) {
Ok(c) => c,
Err(e) => {
return PluginOutput {
ops: (0..input.directives.len()).map(PluginOp::Keep).collect(),
errors: vec![PluginError::error(format!(
"document_discovery: invalid config JSON: {e}"
))],
};
}
};
if config.directories.is_empty() {
return PluginOutput {
ops: (0..input.directives.len()).map(PluginOp::Keep).collect(),
errors: Vec::new(),
};
}
let mut new_directives = Vec::new();
let mut errors = Vec::new();
let mut existing_docs: std::collections::HashSet<String> = std::collections::HashSet::new();
for wrapper in &input.directives {
if let DirectiveData::Document(doc) = &wrapper.data {
let doc_path = Path::new(&doc.path);
let resolved = if doc_path.is_absolute() {
doc_path.to_path_buf()
} else {
config.base_dir.join(doc_path)
};
let normalized = resolved
.canonicalize()
.map_or_else(|_| doc.path.clone(), |p| p.to_string_lossy().to_string());
existing_docs.insert(normalized);
}
}
for dir in &config.directories {
let dir_path = Path::new(dir);
if !dir_path.exists() {
continue;
}
if let Err(e) = scan_documents(
dir_path,
dir,
&existing_docs,
&mut new_directives,
&mut errors,
0, ) {
errors.push(PluginError::error(format!(
"Error scanning documents in {dir}: {e}"
)));
}
}
let mut ops: Vec<PluginOp> = (0..input.directives.len()).map(PluginOp::Keep).collect();
for w in new_directives {
ops.push(PluginOp::Insert(w));
}
PluginOutput { ops, errors }
}
}
impl SynthPlugin for DocumentDiscoveryPlugin {}
#[allow(clippy::only_used_in_recursion)]
fn scan_documents(
path: &std::path::Path,
base_dir: &str,
existing: &std::collections::HashSet<String>,
directives: &mut Vec<DirectiveWrapper>,
errors: &mut Vec<PluginError>,
depth: usize,
) -> std::io::Result<()> {
use std::fs;
if depth > MAX_SCAN_DEPTH {
errors.push(PluginError::warning(format!(
"Maximum directory depth ({MAX_SCAN_DEPTH}) exceeded at {}",
path.display()
)));
return Ok(());
}
for entry in fs::read_dir(path)? {
let entry = entry?;
let entry_path = entry.path();
let metadata = match fs::symlink_metadata(&entry_path) {
Ok(m) => m,
Err(_) => continue, };
if metadata.file_type().is_symlink() {
continue;
}
if metadata.is_dir() {
scan_documents(
&entry_path,
base_dir,
existing,
directives,
errors,
depth + 1,
)?;
} else if metadata.is_file() {
if let Some(file_name) = entry_path.file_name().and_then(|n| n.to_str())
&& file_name.len() >= 10
&& file_name.chars().nth(4) == Some('-')
&& file_name.chars().nth(7) == Some('-')
{
let date_str = &file_name[0..10];
if date_str.chars().take(4).all(|c| c.is_ascii_digit())
&& date_str.chars().skip(5).take(2).all(|c| c.is_ascii_digit())
&& date_str.chars().skip(8).take(2).all(|c| c.is_ascii_digit())
{
if let Ok(rel_path) = entry_path.strip_prefix(base_dir)
&& let Some(parent) = rel_path.parent()
{
let account = parent
.components()
.map(|c| c.as_os_str().to_string_lossy().to_string())
.collect::<Vec<_>>()
.join(":");
if !account.is_empty() {
let full_path = entry_path.to_string_lossy().to_string();
let canonical = entry_path.canonicalize().map_or_else(
|_| full_path.clone(),
|p| p.to_string_lossy().to_string(),
);
if existing.contains(&canonical) {
continue;
}
directives.push(DirectiveWrapper {
directive_type: "document".to_string(),
date: date_str.to_string(),
filename: None, lineno: None,
data: DirectiveData::Document(DocumentData {
account,
path: full_path,
tags: vec![],
links: vec![],
metadata: vec![],
}),
});
}
}
}
}
}
}
Ok(())
}