Skip to main content

freeswitch_sofia_trace_parser/
grep.rs

1use std::io::{BufRead, BufReader, Read};
2
3/// A [`Read`] adapter that strips `grep -C` separator lines (`--\n`) from the
4/// input stream.
5///
6/// When piping grep output into the parser (`grep -C5 pattern dump | parser`),
7/// grep inserts `--` lines between match groups. These corrupt the binary frame
8/// stream. Wrap the input in `GrepFilter` to transparently remove them.
9///
10/// Only exact `--\n` and `--\r\n` lines are stripped. Similar lines like
11/// `---\n` or `-- \n` pass through unchanged.
12///
13/// # When to skip this filter
14///
15/// `GrepFilter` adds a small per-byte overhead (~3% CPU on large files) for
16/// newline scanning even when no separators are present. If your application
17/// opens dump files directly (not piped through grep), pass the reader
18/// straight to [`FrameIterator`](crate::FrameIterator) /
19/// [`MessageIterator`](crate::MessageIterator) /
20/// [`ParsedMessageIterator`](crate::ParsedMessageIterator) without wrapping:
21///
22/// ```no_run
23/// use std::fs::File;
24/// use freeswitch_sofia_trace_parser::ParsedMessageIterator;
25///
26/// // Direct file — no GrepFilter overhead
27/// let file = File::open("profile.dump").unwrap();
28/// for msg in ParsedMessageIterator::new(file) {
29///     // ...
30/// }
31/// ```
32pub struct GrepFilter<R> {
33    inner: BufReader<R>,
34    /// Accumulates a partial line when `--\n` / `--\r\n` detection straddles
35    /// a BufReader buffer boundary.
36    partial: Vec<u8>,
37    partial_pos: usize,
38}
39
40impl<R: Read> GrepFilter<R> {
41    /// Create a new filter wrapping the given reader.
42    pub fn new(reader: R) -> Self {
43        Self {
44            inner: BufReader::new(reader),
45            partial: Vec::new(),
46            partial_pos: 0,
47        }
48    }
49}
50
51fn is_grep_separator(line: &[u8]) -> bool {
52    line == b"--\n" || line == b"--\r\n"
53}
54
55impl<R: Read> Read for GrepFilter<R> {
56    fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
57        // Drain leftover partial line from a previous call
58        if self.partial_pos < self.partial.len() {
59            let available = &self.partial[self.partial_pos..];
60            let n = out.len().min(available.len());
61            out[..n].copy_from_slice(&available[..n]);
62            self.partial_pos += n;
63            if self.partial_pos == self.partial.len() {
64                self.partial.clear();
65                self.partial_pos = 0;
66            }
67            return Ok(n);
68        }
69
70        let mut filled = 0;
71
72        loop {
73            let buf = self.inner.fill_buf()?;
74            if buf.is_empty() {
75                return Ok(filled);
76            }
77
78            let remaining = &mut out[filled..];
79            if remaining.is_empty() {
80                return Ok(filled);
81            }
82
83            match memchr::memchr(b'\n', buf) {
84                Some(nl) => {
85                    let line_len = nl + 1;
86                    if is_grep_separator(&buf[..line_len]) {
87                        self.inner.consume(line_len);
88                        continue;
89                    }
90                    let n = remaining.len().min(line_len);
91                    remaining[..n].copy_from_slice(&buf[..n]);
92                    if n < line_len {
93                        self.partial.extend_from_slice(&buf[n..line_len]);
94                        self.partial_pos = 0;
95                    }
96                    self.inner.consume(line_len);
97                    filled += n;
98                }
99                None => {
100                    // No newline — partial line, can't be a separator.
101                    let buf_len = buf.len();
102                    let n = remaining.len().min(buf_len);
103                    remaining[..n].copy_from_slice(&buf[..n]);
104                    if n < buf_len {
105                        self.partial.extend_from_slice(&buf[n..buf_len]);
106                        self.partial_pos = 0;
107                    }
108                    self.inner.consume(buf_len);
109                    filled += n;
110                }
111            }
112        }
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use std::io::Read;
120
121    fn filter(input: &[u8]) -> Vec<u8> {
122        let mut out = Vec::new();
123        GrepFilter::new(input).read_to_end(&mut out).unwrap();
124        out
125    }
126
127    #[test]
128    fn strip_separator() {
129        assert_eq!(filter(b"hello\n--\nworld\n"), b"hello\nworld\n");
130    }
131
132    #[test]
133    fn strip_crlf_separator() {
134        assert_eq!(filter(b"hello\n--\r\nworld\n"), b"hello\nworld\n");
135    }
136
137    #[test]
138    fn passthrough_no_separators() {
139        let input = b"line one\nline two\nline three\n";
140        assert_eq!(filter(input), input);
141    }
142
143    #[test]
144    fn consecutive_separators() {
145        assert_eq!(filter(b"a\n--\n--\n--\nb\n"), b"a\nb\n");
146    }
147
148    #[test]
149    fn separator_at_start() {
150        assert_eq!(filter(b"--\nhello\n"), b"hello\n");
151    }
152
153    #[test]
154    fn partial_separator_preserved() {
155        let input = b"---\n-- \n--x\n";
156        assert_eq!(filter(input), input);
157    }
158
159    #[test]
160    fn empty_input() {
161        assert_eq!(filter(b""), b"");
162    }
163
164    #[test]
165    fn only_separators() {
166        assert_eq!(filter(b"--\n--\n--\n"), b"");
167    }
168
169    #[test]
170    fn no_trailing_newline() {
171        assert_eq!(filter(b"hello"), b"hello");
172    }
173
174    #[test]
175    fn binary_content_with_separator_like_bytes() {
176        let input = b"data\x00--\nmore\n";
177        assert_eq!(filter(input), input);
178    }
179
180    #[test]
181    fn frame_iterator_grep_separator_between_frames() {
182        use crate::FrameIterator;
183
184        let mut data = Vec::new();
185        data.extend_from_slice(
186            b"recv 5 bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\nhello\x0B\n",
187        );
188        data.extend_from_slice(
189            b"sent 5 bytes to tcp/1.1.1.1:5060 at 00:00:00.000001:\nworld\x0B\n",
190        );
191
192        let filtered = GrepFilter::new(&data[..]);
193        let frames: Vec<_> = FrameIterator::new(filtered)
194            .collect::<Result<Vec<_>, _>>()
195            .unwrap();
196        assert_eq!(frames.len(), 2);
197        assert_eq!(frames[0].content, b"hello");
198        assert_eq!(frames[1].content, b"world");
199    }
200
201    #[test]
202    fn frame_iterator_grep_partial_context() {
203        use crate::FrameIterator;
204
205        let mut data = Vec::new();
206        data.extend_from_slice(
207            b"recv 5 bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\nhello\x0B\n",
208        );
209        data.extend_from_slice(b"Accept: application/sdp\r\nContent-Length: 0\r\n\r\n");
210        data.extend_from_slice(b"\x0B\n");
211        data.extend_from_slice(b"sent 3 bytes to tcp/2.2.2.2:5060 at 00:00:01.000000:\nbye\x0B\n");
212
213        let filtered = GrepFilter::new(&data[..]);
214        let items: Vec<_> = FrameIterator::new(filtered).collect();
215        let frames: Vec<_> = items.into_iter().filter_map(Result::ok).collect();
216        assert_eq!(frames.len(), 2);
217        assert_eq!(frames[0].content, b"hello");
218        assert_eq!(frames[1].content, b"bye");
219    }
220
221    #[test]
222    fn frame_iterator_grep_separator_strips_from_content() {
223        use crate::FrameIterator;
224
225        let content = b"SIP/2.0 200 OK\r\nVia: a\r\nContent-Length: 0\r\n\r\n";
226        let mut data = Vec::new();
227        let header = format!(
228            "recv {} bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\n",
229            content.len()
230        );
231        data.extend_from_slice(header.as_bytes());
232        data.extend_from_slice(b"SIP/2.0 200 OK\r\nVia: a\r\n--\nContent-Length: 0\r\n\r\n");
233        data.extend_from_slice(b"\x0B\n");
234
235        let filtered = GrepFilter::new(&data[..]);
236        let frames: Vec<_> = FrameIterator::new(filtered)
237            .collect::<Result<Vec<_>, _>>()
238            .unwrap();
239        assert_eq!(frames.len(), 1);
240        assert_eq!(frames[0].content, content);
241    }
242}