Skip to main content

mxr_reader/
pipeline.rs

1use crate::{boilerplate, html, quotes, signatures, tracking};
2
3/// Configuration for the reader pipeline.
4#[derive(Debug, Clone)]
5pub struct ReaderConfig {
6    /// External command for HTML-to-text conversion (e.g., "w3m -T text/html -dump").
7    /// If None, uses built-in html2text.
8    pub html_command: Option<String>,
9    /// Whether to strip signatures.
10    pub strip_signatures: bool,
11    /// Whether to collapse quoted replies.
12    pub collapse_quotes: bool,
13    /// Whether to strip boilerplate/disclaimers.
14    pub strip_boilerplate: bool,
15    /// Whether to strip tracking/footer junk.
16    pub strip_tracking: bool,
17}
18
19impl Default for ReaderConfig {
20    fn default() -> Self {
21        Self {
22            html_command: None,
23            strip_signatures: true,
24            collapse_quotes: true,
25            strip_boilerplate: true,
26            strip_tracking: true,
27        }
28    }
29}
30
31/// Output from the reader pipeline.
32#[derive(Debug, Clone)]
33pub struct ReaderOutput {
34    /// Cleaned content: just the human-written text.
35    pub content: String,
36    /// Quoted messages that were stripped (available for expansion).
37    pub quoted_messages: Vec<quotes::QuotedBlock>,
38    /// The signature that was stripped.
39    pub signature: Option<String>,
40    /// Line count stats for UI display.
41    pub original_lines: usize,
42    pub cleaned_lines: usize,
43}
44
45/// Run the full reader pipeline on a message body.
46///
47/// Accepts either plain text or HTML. If HTML, converts to plain text first.
48pub fn clean(text: Option<&str>, html: Option<&str>, config: &ReaderConfig) -> ReaderOutput {
49    // 1. Resolve to plain text
50    let raw = match (text, html) {
51        (Some(t), _) => t.to_string(),
52        (None, Some(h)) => html::to_plain_text(h, config),
53        (None, None) => String::new(),
54    };
55
56    let original_lines = raw.lines().count();
57    let mut content = raw;
58    let mut quoted_messages = Vec::new();
59    let mut signature = None;
60
61    // 2. Extract and collapse quoted replies
62    if config.collapse_quotes {
63        let (cleaned, q) = quotes::collapse(&content);
64        content = cleaned;
65        quoted_messages = q;
66    }
67
68    // 3. Strip signatures
69    if config.strip_signatures {
70        let (cleaned, sig) = signatures::strip(&content);
71        content = cleaned;
72        signature = sig;
73    }
74
75    // 4. Strip boilerplate
76    if config.strip_boilerplate {
77        content = boilerplate::strip(&content);
78    }
79
80    // 5. Strip tracking junk
81    if config.strip_tracking {
82        content = tracking::strip(&content);
83    }
84
85    // 6. Clean up excessive whitespace
86    content = normalize_whitespace(&content);
87
88    let cleaned_lines = content.lines().count();
89
90    ReaderOutput {
91        content,
92        quoted_messages,
93        signature,
94        original_lines,
95        cleaned_lines,
96    }
97}
98
99fn normalize_whitespace(text: &str) -> String {
100    let mut result = String::with_capacity(text.len());
101    let mut blank_count = 0;
102    for line in text.lines() {
103        if line.trim().is_empty() {
104            blank_count += 1;
105            if blank_count <= 2 {
106                result.push('\n');
107            }
108        } else {
109            blank_count = 0;
110            result.push_str(line);
111            result.push('\n');
112        }
113    }
114    result.trim().to_string()
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn plain_email_with_signature() {
123        let text =
124            "Hey,\n\nCan we meet tomorrow at 3pm?\n\nThanks,\n-- \nAlice\nSenior Engineer\n+1 555-0123\nalice@company.com";
125        let output = clean(Some(text), None, &ReaderConfig::default());
126        assert_eq!(
127            output.content.trim(),
128            "Hey,\n\nCan we meet tomorrow at 3pm?\n\nThanks,"
129        );
130        assert!(output.signature.is_some());
131    }
132
133    #[test]
134    fn reader_mode_stats_correct() {
135        let text = "Content here.\n\nOn Mon, alice wrote:\n> Long quote\n> Another line\n> And more\n\n-- \nSig line\nPhone: 555-0123";
136        let output = clean(Some(text), None, &ReaderConfig::default());
137        assert!(output.original_lines > output.cleaned_lines);
138    }
139
140    #[test]
141    fn empty_input_returns_empty() {
142        let output = clean(None, None, &ReaderConfig::default());
143        assert!(output.content.is_empty());
144        assert_eq!(output.original_lines, 0);
145        assert_eq!(output.cleaned_lines, 0);
146    }
147
148    #[test]
149    fn html_preferred_over_none() {
150        let html = "<p>Hello world</p>";
151        let output = clean(None, Some(html), &ReaderConfig::default());
152        assert!(output.content.contains("Hello world"));
153    }
154
155    #[test]
156    fn text_preferred_over_html() {
157        let text = "Plain text version";
158        let html = "<p>HTML version</p>";
159        let output = clean(Some(text), Some(html), &ReaderConfig::default());
160        assert!(output.content.contains("Plain text"));
161    }
162
163    #[test]
164    fn config_disables_stripping() {
165        let text = "Content.\n-- \nMy Signature";
166        let config = ReaderConfig {
167            strip_signatures: false,
168            ..Default::default()
169        };
170        let output = clean(Some(text), None, &config);
171        assert!(output.content.contains("My Signature"));
172        assert!(output.signature.is_none());
173    }
174}