use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
use super::backend::{FileBackend, StorageBackend};
use super::cache::DocumentCache;
use super::persistence::{PersistedDocument, load_document_from_bytes, save_document_to_bytes};
use crate::Error;
use crate::error::Result;
const META_KEY: &str = "meta";
const DEFAULT_CACHE_SIZE: usize = 100;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentMetaEntry {
pub id: String,
pub doc_name: String,
#[serde(default)]
pub doc_description: Option<String>,
pub doc_type: String,
#[serde(default)]
pub path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub page_count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub line_count: Option<usize>,
}
#[derive(Debug, Clone)]
pub struct WorkspaceOptions {
pub cache_size: usize,
}
impl Default for WorkspaceOptions {
fn default() -> Self {
Self {
cache_size: DEFAULT_CACHE_SIZE,
}
}
}
impl WorkspaceOptions {
pub fn new() -> Self {
Self::default()
}
pub fn with_cache_size(mut self, size: usize) -> Self {
self.cache_size = size;
self
}
}
struct WorkspaceInner {
backend: Arc<dyn StorageBackend>,
root: Option<PathBuf>,
meta_index: HashMap<String, DocumentMetaEntry>,
cache: DocumentCache,
document_graph: Option<crate::graph::DocumentGraph>,
}
#[derive(Clone)]
pub struct Workspace {
inner: Arc<RwLock<WorkspaceInner>>,
}
impl std::fmt::Debug for Workspace {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Workspace").finish()
}
}
impl Workspace {
pub async fn with_backend(backend: Arc<dyn StorageBackend>) -> Result<Self> {
Self::with_backend_and_options(backend, WorkspaceOptions::default()).await
}
pub async fn with_backend_and_options(
backend: Arc<dyn StorageBackend>,
options: WorkspaceOptions,
) -> Result<Self> {
let mut inner = WorkspaceInner {
backend,
root: None,
meta_index: HashMap::new(),
cache: DocumentCache::with_capacity(options.cache_size),
document_graph: None,
};
Self::load_meta_index(&mut inner)?;
Ok(Self {
inner: Arc::new(RwLock::new(inner)),
})
}
pub async fn new(path: impl Into<PathBuf>) -> Result<Self> {
Self::with_options(path, WorkspaceOptions::default()).await
}
pub async fn with_cache_size(path: impl Into<PathBuf>, cache_size: usize) -> Result<Self> {
Self::with_options(
path,
WorkspaceOptions {
cache_size,
..Default::default()
},
)
.await
}
pub async fn with_options(path: impl Into<PathBuf>, options: WorkspaceOptions) -> Result<Self> {
let root = path.into();
let backend = Arc::new(FileBackend::new(&root)?);
let mut inner = WorkspaceInner {
backend,
root: Some(root),
meta_index: HashMap::new(),
cache: DocumentCache::with_capacity(options.cache_size),
document_graph: None,
};
Self::load_meta_index(&mut inner)?;
Ok(Self {
inner: Arc::new(RwLock::new(inner)),
})
}
pub async fn path(&self) -> Option<PathBuf> {
let inner = self.inner.read().await;
inner.root.clone()
}
pub async fn list_documents(&self) -> Vec<String> {
let inner = self.inner.read().await;
inner.meta_index.keys().cloned().collect()
}
pub async fn get_meta(&self, id: &str) -> Option<DocumentMetaEntry> {
let inner = self.inner.read().await;
inner.meta_index.get(id).cloned()
}
pub async fn contains(&self, id: &str) -> bool {
let inner = self.inner.read().await;
inner.meta_index.contains_key(id)
}
pub async fn add(&self, doc: &PersistedDocument) -> Result<()> {
let mut inner = self.inner.write().await;
let doc_id = doc.meta.id.clone();
let key = Self::doc_key(&doc_id);
let bytes = save_document_to_bytes(doc)?;
inner.backend.put(&key, &bytes)?;
let meta_entry = DocumentMetaEntry {
id: doc_id.clone(),
doc_name: doc.meta.name.clone(),
doc_description: doc.meta.description.clone(),
doc_type: doc.meta.format.clone(),
path: doc
.meta
.source_path
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
page_count: if doc.pages.is_empty() {
None
} else {
Some(doc.pages.len())
},
line_count: doc.meta.line_count,
};
inner.meta_index.insert(doc_id.clone(), meta_entry);
Self::save_meta_index(&inner)?;
let _ = inner.cache.remove(&doc_id);
info!("Saved document {} to async workspace", doc_id);
inner.document_graph = None;
Ok(())
}
pub async fn load(&self, id: &str) -> Result<Option<PersistedDocument>> {
{
let inner = self.inner.read().await;
if !inner.meta_index.contains_key(id) {
return Ok(None);
}
if let Some(cached) = inner.cache.get(id)? {
debug!("Cache hit for document {}", id);
return Ok(Some(cached));
}
}
let inner = self.inner.read().await;
let key = Self::doc_key(id);
match inner.backend.get(&key)? {
Some(bytes) => {
let doc = load_document_from_bytes(&bytes)?;
debug!("Loaded document {} from backend", id);
Ok(Some(doc))
}
None => {
warn!("Document {} in meta index but not in backend", id);
Ok(None)
}
}
}
pub async fn load_and_cache(&self, id: &str) -> Result<Option<PersistedDocument>> {
{
let inner = self.inner.read().await;
if !inner.meta_index.contains_key(id) {
return Ok(None);
}
if let Some(cached) = inner.cache.get(id)? {
debug!("Cache hit for document {}", id);
return Ok(Some(cached));
}
}
let inner = self.inner.write().await;
let key = Self::doc_key(id);
match inner.backend.get(&key)? {
Some(bytes) => {
let doc = load_document_from_bytes(&bytes)?;
inner.cache.put(id.to_string(), doc.clone())?;
debug!("Loaded and cached document {}", id);
Ok(Some(doc))
}
None => {
warn!("Document {} in meta index but not in backend", id);
Ok(None)
}
}
}
pub async fn remove(&self, id: &str) -> Result<bool> {
let mut inner = self.inner.write().await;
if !inner.meta_index.contains_key(id) {
return Ok(false);
}
let key = Self::doc_key(id);
inner.backend.delete(&key)?;
inner.meta_index.remove(id);
let _ = inner.cache.remove(id);
Self::save_meta_index(&inner)?;
info!("Removed document {} from async workspace", id);
inner.document_graph = None;
Ok(true)
}
pub async fn len(&self) -> usize {
let inner = self.inner.read().await;
inner.meta_index.len()
}
pub async fn is_empty(&self) -> bool {
let inner = self.inner.read().await;
inner.meta_index.is_empty()
}
pub async fn find_by_source_path(&self, path: &std::path::Path) -> Option<String> {
let target = path.to_string_lossy().to_string();
let inner = self.inner.read().await;
for (_, entry) in &inner.meta_index {
if entry.path.as_deref() == Some(target.as_str()) {
return Some(entry.id.clone());
}
}
None
}
pub async fn cache_len(&self) -> usize {
let inner = self.inner.read().await;
inner.cache.len()
}
pub async fn cache_utilization(&self) -> f64 {
let inner = self.inner.read().await;
inner.cache.utilization()
}
pub async fn cache_stats(&self) -> super::cache::CacheStats {
let inner = self.inner.read().await;
inner.cache.stats()
}
pub async fn clear_cache(&self) -> Result<()> {
let inner = self.inner.write().await;
inner.cache.clear()?;
debug!("Cleared async document cache");
Ok(())
}
const GRAPH_KEY: &'static str = "_graph";
pub async fn get_graph(&self) -> Result<Option<crate::graph::DocumentGraph>> {
{
let inner = self.inner.read().await;
if inner.document_graph.is_some() {
return Ok(inner.document_graph.clone());
}
}
let inner = self.inner.read().await;
match inner.backend.get(Self::GRAPH_KEY)? {
Some(bytes) => {
let graph: crate::graph::DocumentGraph =
serde_json::from_slice(&bytes).map_err(|e| {
crate::Error::Serialization(format!("Failed to deserialize graph: {}", e))
})?;
debug!("Loaded document graph from backend");
Ok(Some(graph))
}
None => Ok(None),
}
}
pub async fn set_graph(&self, graph: &crate::graph::DocumentGraph) -> Result<()> {
let mut inner = self.inner.write().await;
let bytes = serde_json::to_vec(graph).map_err(|e| {
crate::Error::Serialization(format!("Failed to serialize graph: {}", e))
})?;
inner.backend.put(Self::GRAPH_KEY, &bytes)?;
inner.document_graph = Some(graph.clone());
info!(
"Persisted document graph ({} nodes, {} edges)",
graph.node_count(),
graph.edge_count()
);
Ok(())
}
pub async fn invalidate_graph(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.document_graph = None;
let _ = inner.backend.delete(Self::GRAPH_KEY);
debug!("Invalidated document graph cache");
Ok(())
}
fn doc_key(id: &str) -> String {
format!("doc:{}", id)
}
fn load_meta_index(inner: &mut WorkspaceInner) -> Result<()> {
match inner.backend.get(META_KEY)? {
Some(bytes) => {
let meta: HashMap<String, DocumentMetaEntry> = serde_json::from_slice(&bytes)
.map_err(|e| Error::Parse(format!("Failed to parse meta index: {}", e)))?;
inner.meta_index = meta;
info!(
"Loaded {} document(s) from async workspace index",
inner.meta_index.len()
);
}
None => {
Self::rebuild_meta_index(inner)?;
}
}
Ok(())
}
fn save_meta_index(inner: &WorkspaceInner) -> Result<()> {
let bytes = serde_json::to_vec_pretty(&inner.meta_index)
.map_err(|e| Error::Parse(format!("Failed to serialize meta index: {}", e)))?;
inner.backend.put(META_KEY, &bytes)?;
Ok(())
}
fn rebuild_meta_index(inner: &mut WorkspaceInner) -> Result<()> {
let keys = inner.backend.keys()?;
let doc_keys: Vec<_> = keys.iter().filter(|k| k.starts_with("doc:")).collect();
for key in doc_keys {
if let Some(bytes) = inner.backend.get(key)? {
if let Ok(doc) = load_document_from_bytes(&bytes) {
let doc_id = doc.meta.id.clone();
let meta_entry = DocumentMetaEntry {
id: doc_id.clone(),
doc_name: doc.meta.name,
doc_description: doc.meta.description,
doc_type: doc.meta.format,
path: doc
.meta
.source_path
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
page_count: if doc.pages.is_empty() {
None
} else {
Some(doc.pages.len())
},
line_count: doc.meta.line_count,
};
inner.meta_index.insert(doc_id, meta_entry);
}
}
}
if !inner.meta_index.is_empty() {
Self::save_meta_index(inner)?;
info!(
"Rebuilt async index from {} document(s)",
inner.meta_index.len()
);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::document::DocumentTree;
fn create_test_doc(id: &str) -> PersistedDocument {
let meta = super::super::persistence::DocumentMeta::new(id, "Test Doc", "md");
let tree = DocumentTree::new("Root", "Content");
PersistedDocument::new(meta, tree)
}
}