bank_csv/
lib.rs

1//! Detect CSV files from a couple of German banks (N26, DKB) and PayPal,
2//! filter out transactions in a specific currency and generate a CSV file with these transactions
3use chrono::NaiveDate;
4use csv::StringRecord;
5use encoding_rs::ISO_8859_10;
6use polars::prelude::*;
7use std::cmp::Ordering;
8use std::fmt;
9use std::fmt::Display;
10use std::fs::File;
11use std::io::Read;
12use std::io::{self, BufRead, BufReader, BufWriter, Write};
13use std::path::Path;
14use tempfile::NamedTempFile;
15
16const CHAR_COMMA: &str = ",";
17const CHAR_DOT: &str = ".";
18const CHAR_DOUBLE_QUOTE: char = '"';
19/// The number of first columns to read from the CSV file; used to detect the source
20pub const NUM_FIRST_COLUMNS: usize = 5;
21/// The number of columns to select from the CSV file
22pub const NUM_SELECT_COLUMNS: usize = 6;
23const PAYPAL_COLUMNS: [&str; NUM_FIRST_COLUMNS] = ["Date", "Time", "TimeZone", "Name", "Type"];
24const PAYPAL_COLUMNS_OLD: [&str; NUM_FIRST_COLUMNS] =
25    ["Date", "Time", "Time Zone", "Description", "Currency"];
26const N26_COLUMNS: [&str; NUM_FIRST_COLUMNS] = [
27    "Date",
28    "Payee",
29    "Account number",
30    "Transaction type",
31    "Payment reference",
32];
33const DKB_COLUMNS: [&str; NUM_FIRST_COLUMNS] = [
34    "Buchungstag",
35    "Wertstellung",
36    "Buchungstext",
37    "Auftraggeber / Begünstigter",
38    "Verwendungszweck",
39];
40
41/// The source of a CSV file
42#[derive(PartialEq)]
43pub enum Source {
44    /// N26 CSV
45    N26,
46    /// PayPal has changed the CSV format at least once
47    PayPal,
48    /// DKB has a weird CSV with some lines on the top that don't match the rest of the file
49    DKB,
50}
51
52impl Display for Source {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        let str = match self {
55            Source::N26 => "N26".to_string(),
56            Source::PayPal => "PayPal".to_string(),
57            Source::DKB => "DKB".to_string(),
58        };
59        write!(f, "{}", str)
60    }
61}
62
63/// Detect the separator of a CSV file
64///
65/// # Arguments
66///
67/// * `file_path`: Path to the CSV file
68///
69/// returns: Result<(u8, Option<Source>), Error>
70pub fn detect_separator(file_path: &Path) -> io::Result<(u8, Option<Source>)> {
71    let file = File::open(file_path)?;
72    let reader = io::BufReader::new(file);
73    if let Some(first_line) = reader.lines().next() {
74        let line = first_line?;
75
76        // DKB has a weird CSV with some lines on the top that don't match the rest of the file
77        let source = if line.contains("Kontonummer:") {
78            Some(Source::DKB)
79        } else {
80            None
81        };
82
83        if line.contains(';') {
84            Ok((b';', source))
85        } else if line.contains(',') {
86            Ok((b',', source))
87        } else if line.contains('\t') {
88            Ok((b'\t', source))
89        } else {
90            Err(io::Error::new(
91                io::ErrorKind::NotFound,
92                format!(
93                    "{}: No separator found in the first line",
94                    file_path.display()
95                ),
96            ))
97        }
98    } else {
99        Err(io::Error::new(
100            io::ErrorKind::InvalidData,
101            format!("{}: Error reading the first line", file_path.display()),
102        ))
103    }
104}
105
106/// Remove the first extra lines from a DKB CSV file
107///
108/// # Arguments
109///
110/// * `original_dkb_csv_file`: path to the original DKB CSV file
111/// * `temp_file`:  a temporary file to write the filtered CSV to
112///
113/// returns: Result<(), Error>
114pub fn dkb_edit_file(
115    original_dkb_csv_file: &Path,
116    mut temp_file: &NamedTempFile,
117) -> io::Result<()> {
118    let input_file = File::open(original_dkb_csv_file)?;
119    let input_reader = BufReader::new(input_file);
120    let mut temp_writer = BufWriter::new(&mut temp_file);
121
122    let mut buffer = Vec::new();
123    input_reader.take(u64::MAX).read_to_end(&mut buffer)?;
124    let (decoded, _, _) = ISO_8859_10.decode(&buffer);
125    let mut write_lines = false;
126    for line_content in decoded.lines() {
127        if line_content.contains("Buchungstag") {
128            write_lines = true;
129        }
130        if write_lines {
131            writeln!(temp_writer, "{}", line_content)?;
132        }
133    }
134
135    // Flush the writer to make sure everything is written to the temporary file
136    temp_writer.flush()?;
137
138    Ok(())
139}
140
141/// Filter the data frame by currency and determine the source based on the first columns of the CSV
142///
143/// # Arguments
144///
145/// * `df`: the data frame to filter
146/// * `upper_currency`: the currency to filter by, in uppercase (EUR, USD, ...)
147///
148/// returns: (Source, DataFrame)
149pub fn filter_data_frame(df: &DataFrame, upper_currency: String) -> (Source, DataFrame) {
150    let schema = df.schema();
151    let first_columns: Vec<&str> = schema
152        .iter_names()
153        .take(NUM_FIRST_COLUMNS)
154        .map(|field| field.as_str())
155        .collect();
156
157    let columns_to_select: [&str; NUM_SELECT_COLUMNS];
158    let source: Source;
159    let lazy_frame: LazyFrame;
160    let cloned_df = df.clone();
161
162    // TODO: move these configs to separate structs or enums instead of "if" statements
163    if first_columns == PAYPAL_COLUMNS {
164        source = Source::PayPal;
165        columns_to_select = [
166            "Date",
167            "Currency",
168            "Gross",
169            "Type",
170            "Name",
171            "Transaction ID",
172        ];
173        lazy_frame = cloned_df
174            .lazy()
175            .filter(col("Currency").eq(lit(upper_currency.as_str())))
176            .filter(col("Balance Impact").eq(lit("Debit")))
177            .filter(col("Type").neq(lit("General Currency Conversion")));
178    } else if first_columns == PAYPAL_COLUMNS_OLD {
179        source = Source::PayPal;
180        columns_to_select = [
181            "Date",
182            "Currency",
183            "Gross",
184            "Description",
185            "Name",
186            "Transaction ID",
187        ];
188        lazy_frame = cloned_df
189            .lazy()
190            .filter(col("Currency").eq(lit(upper_currency.as_str())))
191            .filter(col("Description").neq(lit("General Currency Conversion")));
192    } else if first_columns == N26_COLUMNS {
193        source = Source::N26;
194        let amount_column = if upper_currency == "EUR" {
195            "Amount (EUR)"
196        } else {
197            "Amount (Foreign Currency)"
198        };
199        columns_to_select = [
200            "Date",
201            "Type Foreign Currency",
202            amount_column,
203            "Transaction type",
204            "Payee",
205            "Payment reference",
206        ];
207        lazy_frame = cloned_df
208            .lazy()
209            .filter(col("Type Foreign Currency").eq(lit(upper_currency.as_str())))
210    } else if first_columns == DKB_COLUMNS {
211        source = Source::DKB;
212        columns_to_select = [
213            "Buchungstag",
214            // Use any non-duplicated column here, otherwise polars will panic with:
215            // "column with name 'Verwendungszweck' has more than one occurrences".
216            // The memo (Verwendungszweck = "intended use") contains the foreign currency.
217            // We will filter and replace the value of this column later.
218            "Mandatsreferenz",
219            "Betrag (EUR)",
220            "Buchungstext",
221            "Auftraggeber / Begünstigter",
222            "Verwendungszweck",
223        ];
224        // Filtering will be done manually because DKB doesn't have a currency column
225        lazy_frame = cloned_df.lazy()
226    } else {
227        panic!(
228            "Unknown CSV format. These are the first columns: {:?}",
229            first_columns
230        );
231    }
232
233    (
234        source,
235        lazy_frame
236            .select([cols(columns_to_select)])
237            .collect()
238            .unwrap(),
239    )
240}
241
242/// Extract the amount from a DKB memo
243///
244/// # Arguments
245///
246/// * `currency`: 3-letter currency code
247/// * `memo`: The memo or description of the transaction
248///
249/// returns: `Option<String>`
250///
251/// # Examples
252///
253/// ```
254/// use bank_csv::dkb_extract_amount;
255/// assert_eq!(dkb_extract_amount("BRL", "2023-12-12      Debitk.44 Original 6,99 BRL 1 Euro=5,29545460 BRL VISA Debit"), Some("6,99".to_string()));
256/// assert_eq!(dkb_extract_amount("BRL", "Nothing here"), None);
257pub fn dkb_extract_amount(currency: &str, memo: &str) -> Option<String> {
258    let original_keyword = "Original ";
259    let start = memo.find(original_keyword)?;
260    let end = memo.find(currency)?;
261    if end <= start {
262        return None;
263    }
264
265    let amount_start = start + original_keyword.len();
266    let amount = &memo[amount_start..end].trim();
267
268    Some(amount.to_string())
269}
270
271/// A row in the CSV output
272#[derive(PartialEq, Eq)]
273pub struct CsvOutputRow {
274    /// The date of the transaction
275    pub date: NaiveDate,
276    /// The source of the transaction (PayPal, N26, DKB)
277    pub source: String,
278    /// The currency of the transaction, 3 letters (EUR, USD, ...)
279    pub currency: String,
280    /// The amount of the transaction
281    pub amount: String,
282    /// The type of the transaction, read from the original CSV
283    pub transaction_type: String,
284    /// The payee of the transaction
285    pub payee: String,
286    /// The memo or description of the transaction
287    pub memo: String,
288}
289
290impl PartialOrd for CsvOutputRow {
291    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
292        Some(self.cmp(other))
293    }
294}
295
296impl Ord for CsvOutputRow {
297    fn cmp(&self, other: &Self) -> Ordering {
298        match self.date.cmp(&other.date) {
299            Ordering::Equal => match self.currency.cmp(&other.currency) {
300                Ordering::Equal => match self.amount.cmp(&other.amount) {
301                    Ordering::Equal => match self.transaction_type.cmp(&other.transaction_type) {
302                        Ordering::Equal => self.payee.cmp(&other.payee),
303                        other => other,
304                    },
305                    other => other,
306                },
307                other => other,
308            },
309            other => other,
310        }
311    }
312}
313
314impl fmt::Display for CsvOutputRow {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        write!(
317            f,
318            "{} [{}] {} {} paid to {} ({})",
319            self.date, self.source, self.currency, self.amount, self.payee, self.transaction_type,
320        )
321    }
322}
323
324fn strip_quotes(s: String) -> String {
325    s.strip_prefix(CHAR_DOUBLE_QUOTE)
326        .unwrap_or(s.as_str())
327        .strip_suffix(CHAR_DOUBLE_QUOTE)
328        .unwrap_or(s.as_str())
329        .to_string()
330}
331
332impl CsvOutputRow {
333    /// Create a new CsvOutputRow
334    pub fn new(
335        date: NaiveDate,
336        source: String,
337        currency: String,
338        amount: String,
339        transaction_type: String,
340        payee: String,
341        memo: String,
342    ) -> Self {
343        Self {
344            date,
345            source,
346            currency: strip_quotes(currency),
347            // "Numbers" on my macOS only understands commas as decimal separators;
348            // I can make it configurable if someone ever uses this crate
349            amount: strip_quotes(amount).replace(CHAR_DOT, CHAR_COMMA),
350            transaction_type: strip_quotes(transaction_type),
351            payee: strip_quotes(payee),
352            memo: strip_quotes(memo),
353        }
354    }
355
356    /// Create a CSV header
357    pub fn header() -> StringRecord {
358        let mut record = StringRecord::new();
359        record.push_field("Date");
360        record.push_field("Source");
361        record.push_field("Currency");
362        record.push_field("Amount");
363        record.push_field("Type");
364        record.push_field("Payee");
365        record.push_field("Memo");
366        record
367    }
368
369    /// Convert a CsvOutputRow to a CSV record
370    pub fn to_record(&self) -> StringRecord {
371        let mut record = StringRecord::new();
372        record.push_field(&self.date.format("%Y-%m-%d").to_string());
373        record.push_field(&self.source);
374        record.push_field(&self.currency);
375        record.push_field(&self.amount);
376        record.push_field(&self.transaction_type);
377        record.push_field(&self.payee);
378        record.push_field(&self.memo);
379        record
380    }
381}