Skip to main content

rustledger_importer/
lib.rs

1//! Import framework for rustledger
2//!
3//! This crate provides the infrastructure for extracting transactions from
4//! bank statements, credit card statements, and other financial documents.
5//!
6//! # Overview
7//!
8//! The import system is modeled after Python beancount's bean-extract. It uses
9//! a trait-based approach where each importer implements the [`Importer`] trait.
10//!
11//! # Example
12//!
13//! ```rust,no_run
14//! use rustledger_importer::{Importer, ImporterConfig, extract_from_file};
15//! use rustledger_core::Directive;
16//! use std::path::Path;
17//!
18//! // Create a CSV importer configuration
19//! let config = ImporterConfig::csv()
20//!     .account("Assets:Bank:Checking")
21//!     .date_column("Date")
22//!     .narration_column("Description")
23//!     .amount_column("Amount")
24//!     .build();
25//!
26//! // Extract transactions from a file
27//! // let directives = extract_from_file(Path::new("bank.csv"), &config)?;
28//! ```
29
30#![forbid(unsafe_code)]
31#![warn(missing_docs)]
32
33pub mod config;
34pub mod csv_importer;
35pub mod csv_inference;
36pub mod ofx_importer;
37pub mod registry;
38
39use anyhow::Result;
40use rustledger_core::Directive;
41use rustledger_ops::enrichment::Enrichment;
42use std::path::Path;
43
44pub use config::ImporterConfig;
45pub use ofx_importer::OfxImporter;
46pub use registry::ImporterRegistry;
47
48use rustledger_ops::fingerprint::Fingerprint;
49
50/// Compute an import fingerprint from a directive.
51///
52/// For transactions, uses the first posting's amount and the payee+narration
53/// text. Returns `None` for non-transaction directives.
54pub(crate) fn directive_fingerprint(directive: &Directive) -> Option<Fingerprint> {
55    let Directive::Transaction(txn) = directive else {
56        return None;
57    };
58    let amount_str = txn.postings.first().and_then(|p| {
59        p.units
60            .as_ref()
61            .and_then(|u| u.number().map(|n| n.to_string()))
62    });
63    let mut text = String::new();
64    if let Some(ref payee) = txn.payee {
65        text.push_str(payee.as_str());
66        text.push(' ');
67    }
68    text.push_str(txn.narration.as_str());
69    Some(Fingerprint::compute(
70        &txn.date.to_string(),
71        amount_str.as_deref(),
72        &text,
73    ))
74}
75
76/// Result of an import operation.
77#[derive(Debug, Clone)]
78pub struct ImportResult {
79    /// The extracted directives.
80    pub directives: Vec<Directive>,
81    /// Warnings encountered during import.
82    pub warnings: Vec<String>,
83}
84
85impl ImportResult {
86    /// Create a new import result.
87    pub const fn new(directives: Vec<Directive>) -> Self {
88        Self {
89            directives,
90            warnings: Vec::new(),
91        }
92    }
93
94    /// Create an empty import result.
95    pub const fn empty() -> Self {
96        Self {
97            directives: Vec::new(),
98            warnings: Vec::new(),
99        }
100    }
101
102    /// Add a warning to the result.
103    pub fn with_warning(mut self, warning: impl Into<String>) -> Self {
104        self.warnings.push(warning.into());
105        self
106    }
107}
108
109/// Result of an enriched import operation.
110///
111/// Each directive is paired with an [`Enrichment`] that carries metadata about
112/// how it was categorized, its confidence score, and a stable fingerprint for
113/// deduplication.
114#[derive(Debug, Clone)]
115pub struct EnrichedImportResult {
116    /// Directive–enrichment pairs.
117    pub entries: Vec<(Directive, Enrichment)>,
118    /// Warnings encountered during import.
119    pub warnings: Vec<String>,
120}
121
122impl EnrichedImportResult {
123    /// Create a new enriched import result.
124    pub const fn new(entries: Vec<(Directive, Enrichment)>) -> Self {
125        Self {
126            entries,
127            warnings: Vec::new(),
128        }
129    }
130
131    /// Create an empty enriched import result.
132    pub const fn empty() -> Self {
133        Self {
134            entries: Vec::new(),
135            warnings: Vec::new(),
136        }
137    }
138
139    /// Add a warning.
140    pub fn with_warning(mut self, warning: impl Into<String>) -> Self {
141        self.warnings.push(warning.into());
142        self
143    }
144
145    /// Convert to a plain [`ImportResult`], discarding enrichment metadata.
146    #[must_use]
147    pub fn into_import_result(self) -> ImportResult {
148        ImportResult {
149            directives: self.entries.into_iter().map(|(d, _)| d).collect(),
150            warnings: self.warnings,
151        }
152    }
153}
154
155impl From<EnrichedImportResult> for ImportResult {
156    fn from(enriched: EnrichedImportResult) -> Self {
157        enriched.into_import_result()
158    }
159}
160
161/// Trait for file importers.
162///
163/// Implementors of this trait can extract beancount directives from various
164/// file formats (CSV, OFX, QFX, etc.).
165pub trait Importer: Send + Sync {
166    /// Returns the name of this importer.
167    fn name(&self) -> &str;
168
169    /// Check if this importer can handle the given file.
170    ///
171    /// This method should be fast - it typically checks file extension,
172    /// header patterns, or other quick heuristics.
173    fn identify(&self, path: &Path) -> bool;
174
175    /// Extract directives from the given file.
176    fn extract(&self, path: &Path) -> Result<ImportResult>;
177
178    /// Returns a description of what this importer handles.
179    fn description(&self) -> &str {
180        self.name()
181    }
182}
183
184/// Extract transactions from a file using the given configuration.
185pub fn extract_from_file(path: &Path, config: &ImporterConfig) -> Result<ImportResult> {
186    config.extract(path)
187}
188
189/// Extract transactions from file contents (useful for testing).
190pub fn extract_from_string(content: &str, config: &ImporterConfig) -> Result<ImportResult> {
191    config.extract_from_string(content)
192}
193
194/// Auto-extract transactions from a file by inferring its format.
195///
196/// If the file is OFX/QFX, uses the OFX importer directly. Otherwise,
197/// attempts to infer the CSV format from the file content. Returns the
198/// enriched result with fingerprints and confidence scores.
199///
200/// # Errors
201///
202/// Returns an error if the file can't be read, the format can't be inferred,
203/// or extraction fails.
204pub fn auto_extract(
205    path: &std::path::Path,
206    account: &str,
207    currency: &str,
208) -> Result<EnrichedImportResult> {
209    // Check for OFX first
210    if path
211        .extension()
212        .is_some_and(|ext| ext.eq_ignore_ascii_case("ofx") || ext.eq_ignore_ascii_case("qfx"))
213    {
214        let ofx = ofx_importer::OfxImporter::new(account, currency);
215        return ofx.extract_from_string_enriched(&std::fs::read_to_string(path)?);
216    }
217
218    // Try CSV auto-inference
219    let content = std::fs::read_to_string(path)
220        .map_err(|e| anyhow::anyhow!("Failed to read file {}: {e}", path.display()))?;
221
222    let inferred = csv_inference::infer_csv_config(&content)
223        .ok_or_else(|| anyhow::anyhow!("Could not infer CSV format from {}", path.display()))?;
224
225    let csv_config = inferred.to_csv_config();
226    let importer_config = config::ImporterConfig {
227        account: account.to_string(),
228        currency: Some(currency.to_string()),
229        amount_format: config::AmountFormat::default(),
230        importer_type: config::ImporterType::Csv(csv_config.clone()),
231    };
232    let importer = csv_importer::CsvImporter::new(importer_config);
233    importer.extract_string_enriched(&content, &csv_config)
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use rust_decimal::Decimal;
240    use rustledger_core::{Amount, Posting, Transaction};
241    use std::str::FromStr;
242
243    // ========== ImportResult Tests ==========
244
245    #[test]
246    fn test_import_result_new() {
247        let directives = vec![];
248        let result = ImportResult::new(directives);
249        assert!(result.directives.is_empty());
250        assert!(result.warnings.is_empty());
251    }
252
253    #[test]
254    fn test_import_result_empty() {
255        let result = ImportResult::empty();
256        assert!(result.directives.is_empty());
257        assert!(result.warnings.is_empty());
258    }
259
260    #[test]
261    fn test_import_result_with_warning() {
262        let result = ImportResult::empty().with_warning("Test warning");
263        assert_eq!(result.warnings.len(), 1);
264        assert_eq!(result.warnings[0], "Test warning");
265    }
266
267    #[test]
268    fn test_import_result_multiple_warnings() {
269        let result = ImportResult::empty()
270            .with_warning("Warning 1")
271            .with_warning("Warning 2");
272        assert_eq!(result.warnings.len(), 2);
273        assert_eq!(result.warnings[0], "Warning 1");
274        assert_eq!(result.warnings[1], "Warning 2");
275    }
276
277    #[test]
278    fn test_import_result_with_directives() {
279        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
280        let txn = Transaction::new(date, "Test transaction")
281            .with_posting(Posting::new(
282                "Assets:Bank",
283                Amount::new(Decimal::from_str("100").unwrap(), "USD"),
284            ))
285            .with_posting(Posting::new(
286                "Expenses:Food",
287                Amount::new(Decimal::from_str("-100").unwrap(), "USD"),
288            ));
289        let directives = vec![Directive::Transaction(txn)];
290        let result = ImportResult::new(directives);
291        assert_eq!(result.directives.len(), 1);
292    }
293
294    // ========== extract_from_string Tests ==========
295
296    #[test]
297    fn test_extract_from_string_csv() {
298        let config = ImporterConfig::csv()
299            .account("Assets:Bank:Checking")
300            .currency("USD")
301            .date_column("Date")
302            .narration_column("Description")
303            .amount_column("Amount")
304            .build()
305            .unwrap();
306
307        let csv_content = "Date,Description,Amount\n2024-01-15,Coffee,-5.00\n";
308        let result = extract_from_string(csv_content, &config).unwrap();
309        assert_eq!(result.directives.len(), 1);
310    }
311
312    #[test]
313    fn test_extract_from_string_empty_csv() {
314        let config = ImporterConfig::csv()
315            .account("Assets:Bank:Checking")
316            .currency("USD")
317            .date_column("Date")
318            .narration_column("Description")
319            .amount_column("Amount")
320            .build()
321            .unwrap();
322
323        let csv_content = "Date,Description,Amount\n";
324        let result = extract_from_string(csv_content, &config).unwrap();
325        assert!(result.directives.is_empty());
326    }
327
328    #[test]
329    fn test_import_result_debug() {
330        let result = ImportResult::empty();
331        let debug_str = format!("{result:?}");
332        assert!(debug_str.contains("ImportResult"));
333    }
334
335    #[test]
336    fn test_import_result_clone() {
337        let result = ImportResult::empty().with_warning("Test");
338        let cloned = result.clone();
339        // Verify both original and clone have the warning
340        assert_eq!(result.warnings.len(), 1);
341        assert_eq!(cloned.warnings.len(), 1);
342    }
343
344    // ========== EnrichedImportResult Tests ==========
345
346    fn make_test_enrichment(index: usize, confidence: f64) -> Enrichment {
347        Enrichment {
348            directive_index: index,
349            confidence,
350            method: rustledger_ops::enrichment::CategorizationMethod::Rule,
351            alternatives: vec![],
352            fingerprint: None,
353        }
354    }
355
356    fn make_test_txn_directive() -> Directive {
357        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
358        let txn = Transaction::new(date, "Test")
359            .with_posting(Posting::new(
360                "Assets:Bank",
361                Amount::new(Decimal::from_str("-50").unwrap(), "USD"),
362            ))
363            .with_posting(Posting::new(
364                "Expenses:Food",
365                Amount::new(Decimal::from_str("50").unwrap(), "USD"),
366            ));
367        Directive::Transaction(txn)
368    }
369
370    #[test]
371    fn test_enriched_import_result_new() {
372        let directive = make_test_txn_directive();
373        let enrichment = make_test_enrichment(0, 0.95);
374        let entries = vec![(directive, enrichment)];
375        let result = EnrichedImportResult::new(entries);
376        assert_eq!(result.entries.len(), 1);
377        assert!(result.warnings.is_empty());
378    }
379
380    #[test]
381    fn test_enriched_import_result_empty() {
382        let result = EnrichedImportResult::empty();
383        assert!(result.entries.is_empty());
384        assert!(result.warnings.is_empty());
385    }
386
387    #[test]
388    fn test_enriched_import_result_with_warning() {
389        let result = EnrichedImportResult::empty().with_warning("Test warning");
390        assert_eq!(result.warnings.len(), 1);
391        assert_eq!(result.warnings[0], "Test warning");
392    }
393
394    #[test]
395    fn test_enriched_import_result_multiple_warnings() {
396        let result = EnrichedImportResult::empty()
397            .with_warning("Warning 1")
398            .with_warning("Warning 2");
399        assert_eq!(result.warnings.len(), 2);
400    }
401
402    #[test]
403    fn test_enriched_into_import_result() {
404        let d1 = make_test_txn_directive();
405        let d2 = make_test_txn_directive();
406        let entries = vec![
407            (d1, make_test_enrichment(0, 0.95)),
408            (d2, make_test_enrichment(1, 0.3)),
409        ];
410        let enriched = EnrichedImportResult::new(entries).with_warning("A warning");
411
412        let plain = enriched.into_import_result();
413        // Directives preserved, enrichment dropped
414        assert_eq!(plain.directives.len(), 2);
415        // Warnings preserved
416        assert_eq!(plain.warnings.len(), 1);
417        assert_eq!(plain.warnings[0], "A warning");
418    }
419
420    #[test]
421    fn test_enriched_from_into_import_result() {
422        let entries = vec![(make_test_txn_directive(), make_test_enrichment(0, 1.0))];
423        let enriched = EnrichedImportResult::new(entries);
424
425        // Use the From<EnrichedImportResult> for ImportResult trait
426        let plain: ImportResult = enriched.into();
427        assert_eq!(plain.directives.len(), 1);
428        assert!(plain.warnings.is_empty());
429    }
430
431    #[test]
432    fn test_enriched_import_result_debug_and_clone() {
433        let result = EnrichedImportResult::empty().with_warning("Test");
434        let debug_str = format!("{result:?}");
435        assert!(debug_str.contains("EnrichedImportResult"));
436        let cloned = result;
437        assert_eq!(cloned.warnings.len(), 1);
438    }
439
440    // ========== directive_fingerprint Tests ==========
441
442    #[test]
443    fn test_directive_fingerprint_for_transaction() {
444        let directive = make_test_txn_directive();
445        let fp = directive_fingerprint(&directive);
446        assert!(fp.is_some());
447    }
448
449    #[test]
450    fn test_directive_fingerprint_none_for_non_transaction() {
451        // Use a Balance directive
452        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
453        let balance = rustledger_core::Balance::new(
454            date,
455            "Assets:Bank",
456            Amount::new(Decimal::from_str("1000").unwrap(), "USD"),
457        );
458        let directive = Directive::Balance(balance);
459        let fp = directive_fingerprint(&directive);
460        assert!(fp.is_none());
461    }
462}