tex2csv/
file_parser.rs

1use std::fs::File;
2use std::io::{BufRead, BufReader, Write};
3
4/// This function shall parse a LaTex Table into a csv file.
5/// This simplest case of such a table might look like this:
6/// ```latex
7/// \begin{tabular}{|c|c|c|}
8///   \hline
9///   Name & Age & City \\
10///   \hline
11///   John & 25 & New York \\
12///   Jane & 30 & San Francisco \\
13///   \hline
14/// \end{tabular}
15/// ```
16/// The `tabular` environment can be something else, e.g. `longtable`.
17///
18/// Arguments
19/// =========
20///     * `file_handle` - A handle to a `BufReader`, i.e. the .tex file.
21///     * `environment` - A string slice specifing the name of the environment wrapping the table
22pub fn parse_file(
23    tex_file_handle: BufReader<File>,
24    mut csv_file: File,
25    environment: &str,
26) -> Result<(), Box<dyn std::error::Error>> {
27    let open_env: String = String::from(r"\begin{") + environment;
28    let close_env: String = String::from(r"\end{") + environment;
29    // Parse exclusion file
30    let exclude_lines = match File::open("exclude_lines.txt") {
31        Ok(f) => f,
32        Err(e) => {
33            println!(
34                r"You need to supply a file 'exclude_lines.txt' containing keyword identifying lines to be skipped.
35        For example: 
36            \midrule
37            \toprule
38            \endheader
39            \begin
40            \end
41            {}
42            ", e
43            );
44            panic!("No 'exclude_lines.txt' given.")
45        }
46    };
47    let exclude_handle = BufReader::new(exclude_lines);
48    let exclude_iterator = parse_exclude_list(exclude_handle).unwrap();
49    // Iterate over each line in the LaTeX file
50    for line in tex_file_handle.lines() {
51        // Unwrap Result returned in the lines iterator.
52        let line = line?;
53        // Check if we have a separator, open/closing of environment or empty line
54        if exclude_iterator.contains(&line.trim().to_owned())
55            || line.trim().is_empty()
56            || line.trim().contains(&open_env)
57            || line.trim().contains(&close_env)
58        {
59            continue;
60        } else {
61            // Split the line into columns using "&" as a delimiter and remove "\\"
62            let columns: Vec<&str> = line.split('&').map(|s| s.trim()).collect();
63            let columns: Vec<String> = columns.iter().map(|&s| s.replace(r"\\", "")).collect();
64            // Write the CSV header to the CSV file
65            writeln!(csv_file, "{}", columns.join(","))?;
66        }
67    }
68    Ok(())
69}
70
71/// This function makes it possible to parse a file with keywords to be skipped on parsing the tex
72/// file
73pub fn parse_exclude_list(
74    exclude_file: BufReader<File>,
75) -> Result<Vec<String>, Box<dyn std::error::Error>> {
76    let exclude_list: Vec<String> = exclude_file.lines().map(|l| l.unwrap()).collect();
77    println!("Parsed exclusion list:");
78    for i in exclude_list.iter() {
79        println!("{}",i)
80    }
81    Ok(exclude_list)
82}