Skip to main content

rustledger_plugin/native/plugins/
document_discovery.rs

1//! Auto-discover documents from directories.
2
3use crate::types::{
4    DirectiveData, DirectiveWrapper, DocumentData, PluginError, PluginInput, PluginOutput,
5    sort_directives,
6};
7
8use super::super::NativePlugin;
9
10/// Plugin that auto-discovers document files from configured directories.
11///
12/// Scans directories specified in `option "documents"` for files matching
13/// the pattern: `{Account}/YYYY-MM-DD.description.*`
14///
15/// For example: `documents/Assets/Bank/Checking/2024-01-15.statement.pdf`
16/// generates: `2024-01-15 document Assets:Bank:Checking "documents/Assets/Bank/Checking/2024-01-15.statement.pdf"`
17pub struct DocumentDiscoveryPlugin {
18    /// Directories to scan for documents.
19    pub directories: Vec<String>,
20}
21
22impl DocumentDiscoveryPlugin {
23    /// Create a new plugin with the given directories.
24    pub const fn new(directories: Vec<String>) -> Self {
25        Self { directories }
26    }
27}
28
29impl NativePlugin for DocumentDiscoveryPlugin {
30    fn name(&self) -> &'static str {
31        "document_discovery"
32    }
33
34    fn description(&self) -> &'static str {
35        "Auto-discover documents from directories"
36    }
37
38    fn process(&self, input: PluginInput) -> PluginOutput {
39        use std::path::Path;
40
41        let mut new_directives = Vec::new();
42        let mut errors = Vec::new();
43
44        // Collect existing document paths to avoid duplicates
45        let mut existing_docs: std::collections::HashSet<String> = std::collections::HashSet::new();
46        for wrapper in &input.directives {
47            if let DirectiveData::Document(doc) = &wrapper.data {
48                existing_docs.insert(doc.path.clone());
49            }
50        }
51
52        // Scan each directory
53        for dir in &self.directories {
54            let dir_path = Path::new(dir);
55            if !dir_path.exists() {
56                continue;
57            }
58
59            if let Err(e) = scan_documents(
60                dir_path,
61                dir,
62                &existing_docs,
63                &mut new_directives,
64                &mut errors,
65            ) {
66                errors.push(PluginError::error(format!(
67                    "Error scanning documents in {dir}: {e}"
68                )));
69            }
70        }
71
72        // Add discovered documents to directives
73        let mut all_directives = input.directives;
74        all_directives.extend(new_directives);
75
76        // Sort using beancount's standard ordering
77        sort_directives(&mut all_directives);
78
79        PluginOutput {
80            directives: all_directives,
81            errors,
82        }
83    }
84}
85
86/// Recursively scan a directory for document files.
87#[allow(clippy::only_used_in_recursion)]
88fn scan_documents(
89    path: &std::path::Path,
90    base_dir: &str,
91    existing: &std::collections::HashSet<String>,
92    directives: &mut Vec<DirectiveWrapper>,
93    errors: &mut Vec<PluginError>,
94) -> std::io::Result<()> {
95    use std::fs;
96
97    for entry in fs::read_dir(path)? {
98        let entry = entry?;
99        let entry_path = entry.path();
100
101        if entry_path.is_dir() {
102            scan_documents(&entry_path, base_dir, existing, directives, errors)?;
103        } else if entry_path.is_file() {
104            // Try to parse filename as YYYY-MM-DD.description.ext
105            if let Some(file_name) = entry_path.file_name().and_then(|n| n.to_str()) {
106                if file_name.len() >= 10
107                    && file_name.chars().nth(4) == Some('-')
108                    && file_name.chars().nth(7) == Some('-')
109                {
110                    let date_str = &file_name[0..10];
111                    // Validate date format
112                    if date_str.chars().take(4).all(|c| c.is_ascii_digit())
113                        && date_str.chars().skip(5).take(2).all(|c| c.is_ascii_digit())
114                        && date_str.chars().skip(8).take(2).all(|c| c.is_ascii_digit())
115                    {
116                        // Extract account from path relative to base_dir
117                        if let Ok(rel_path) = entry_path.strip_prefix(base_dir) {
118                            if let Some(parent) = rel_path.parent() {
119                                let account = parent
120                                    .components()
121                                    .map(|c| c.as_os_str().to_string_lossy().to_string())
122                                    .collect::<Vec<_>>()
123                                    .join(":");
124
125                                if !account.is_empty() {
126                                    let full_path = entry_path.to_string_lossy().to_string();
127
128                                    // Skip if already exists
129                                    if existing.contains(&full_path) {
130                                        continue;
131                                    }
132
133                                    directives.push(DirectiveWrapper {
134                                        directive_type: "document".to_string(),
135                                        date: date_str.to_string(),
136                                        filename: None, // Plugin-generated
137                                        lineno: None,
138                                        data: DirectiveData::Document(DocumentData {
139                                            account,
140                                            path: full_path,
141                                            metadata: vec![],
142                                        }),
143                                    });
144                                }
145                            }
146                        }
147                    }
148                }
149            }
150        }
151    }
152
153    Ok(())
154}