Skip to main content

freeswitch_sofia_trace_parser/
grep.rs

1use std::io::{BufRead, BufReader, Read};
2
3/// A [`Read`] adapter that strips `grep -C` separator lines (`--\n`) from the
4/// input stream.
5///
6/// When piping grep output into the parser (`grep -C5 pattern dump | parser`),
7/// grep inserts `--` lines between match groups. These corrupt the binary frame
8/// stream. Wrap the input in `GrepFilter` to transparently remove them.
9///
10/// Only exact `--\n` and `--\r\n` lines are stripped. Similar lines like
11/// `---\n` or `-- \n` pass through unchanged.
12///
13/// # When to skip this filter
14///
15/// `GrepFilter` adds a small per-byte overhead (~3% CPU on large files) for
16/// newline scanning even when no separators are present. If your application
17/// opens dump files directly (not piped through grep), pass the reader
18/// straight to [`FrameIterator`](crate::FrameIterator) /
19/// [`MessageIterator`](crate::MessageIterator) /
20/// [`ParsedMessageIterator`](crate::ParsedMessageIterator) without wrapping:
21///
22/// ```no_run
23/// use std::fs::File;
24/// use freeswitch_sofia_trace_parser::ParsedMessageIterator;
25///
26/// // Direct file — no GrepFilter overhead
27/// let file = File::open("profile.dump").unwrap();
28/// for msg in ParsedMessageIterator::new(file) {
29///     // ...
30/// }
31/// ```
32pub struct GrepFilter<R> {
33    inner: BufReader<R>,
34    /// Accumulates a partial line when `--\n` / `--\r\n` detection straddles
35    /// a BufReader buffer boundary.
36    partial: Vec<u8>,
37    partial_pos: usize,
38}
39
40impl<R: Read> GrepFilter<R> {
41    /// Create a new filter wrapping the given reader.
42    pub fn new(reader: R) -> Self {
43        Self {
44            inner: BufReader::new(reader),
45            partial: Vec::new(),
46            partial_pos: 0,
47        }
48    }
49}
50
51fn is_grep_separator(line: &[u8]) -> bool {
52    line == b"--\n" || line == b"--\r\n"
53}
54
55impl<R: Read> Read for GrepFilter<R> {
56    fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
57        // Drain leftover partial line from a previous call
58        if self.partial_pos < self.partial.len() {
59            let available = &self.partial[self.partial_pos..];
60            let n = out.len().min(available.len());
61            out[..n].copy_from_slice(&available[..n]);
62            self.partial_pos += n;
63            if self.partial_pos == self.partial.len() {
64                self.partial.clear();
65                self.partial_pos = 0;
66            }
67            return Ok(n);
68        }
69
70        loop {
71            let buf = self.inner.fill_buf()?;
72            if buf.is_empty() {
73                return Ok(0);
74            }
75
76            match memchr::memchr(b'\n', buf) {
77                Some(nl) => {
78                    let line_len = nl + 1;
79                    if is_grep_separator(&buf[..line_len]) {
80                        self.inner.consume(line_len);
81                        continue;
82                    }
83                    let n = out.len().min(line_len);
84                    out[..n].copy_from_slice(&buf[..n]);
85                    if n < line_len {
86                        self.partial.extend_from_slice(&buf[n..line_len]);
87                        self.partial_pos = 0;
88                    }
89                    self.inner.consume(line_len);
90                    return Ok(n);
91                }
92                None => {
93                    // No newline — partial line, can't be a separator.
94                    let buf_len = buf.len();
95                    let n = out.len().min(buf_len);
96                    out[..n].copy_from_slice(&buf[..n]);
97                    if n < buf_len {
98                        self.partial.extend_from_slice(&buf[n..buf_len]);
99                        self.partial_pos = 0;
100                    }
101                    self.inner.consume(buf_len);
102                    return Ok(n);
103                }
104            }
105        }
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use std::io::Read;
113
114    fn filter(input: &[u8]) -> Vec<u8> {
115        let mut out = Vec::new();
116        GrepFilter::new(input).read_to_end(&mut out).unwrap();
117        out
118    }
119
120    #[test]
121    fn strip_separator() {
122        assert_eq!(filter(b"hello\n--\nworld\n"), b"hello\nworld\n");
123    }
124
125    #[test]
126    fn strip_crlf_separator() {
127        assert_eq!(filter(b"hello\n--\r\nworld\n"), b"hello\nworld\n");
128    }
129
130    #[test]
131    fn passthrough_no_separators() {
132        let input = b"line one\nline two\nline three\n";
133        assert_eq!(filter(input), input);
134    }
135
136    #[test]
137    fn consecutive_separators() {
138        assert_eq!(filter(b"a\n--\n--\n--\nb\n"), b"a\nb\n");
139    }
140
141    #[test]
142    fn separator_at_start() {
143        assert_eq!(filter(b"--\nhello\n"), b"hello\n");
144    }
145
146    #[test]
147    fn partial_separator_preserved() {
148        let input = b"---\n-- \n--x\n";
149        assert_eq!(filter(input), input);
150    }
151
152    #[test]
153    fn empty_input() {
154        assert_eq!(filter(b""), b"");
155    }
156
157    #[test]
158    fn only_separators() {
159        assert_eq!(filter(b"--\n--\n--\n"), b"");
160    }
161
162    #[test]
163    fn no_trailing_newline() {
164        assert_eq!(filter(b"hello"), b"hello");
165    }
166
167    #[test]
168    fn binary_content_with_separator_like_bytes() {
169        let input = b"data\x00--\nmore\n";
170        assert_eq!(filter(input), input);
171    }
172
173    #[test]
174    fn frame_iterator_grep_separator_between_frames() {
175        use crate::FrameIterator;
176
177        let mut data = Vec::new();
178        data.extend_from_slice(
179            b"recv 5 bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\nhello\x0B\n",
180        );
181        data.extend_from_slice(
182            b"sent 5 bytes to tcp/1.1.1.1:5060 at 00:00:00.000001:\nworld\x0B\n",
183        );
184
185        let filtered = GrepFilter::new(&data[..]);
186        let frames: Vec<_> = FrameIterator::new(filtered)
187            .collect::<Result<Vec<_>, _>>()
188            .unwrap();
189        assert_eq!(frames.len(), 2);
190        assert_eq!(frames[0].content, b"hello");
191        assert_eq!(frames[1].content, b"world");
192    }
193
194    #[test]
195    fn frame_iterator_grep_partial_context() {
196        use crate::FrameIterator;
197
198        let mut data = Vec::new();
199        data.extend_from_slice(
200            b"recv 5 bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\nhello\x0B\n",
201        );
202        data.extend_from_slice(b"Accept: application/sdp\r\nContent-Length: 0\r\n\r\n");
203        data.extend_from_slice(b"\x0B\n");
204        data.extend_from_slice(b"sent 3 bytes to tcp/2.2.2.2:5060 at 00:00:01.000000:\nbye\x0B\n");
205
206        let filtered = GrepFilter::new(&data[..]);
207        let items: Vec<_> = FrameIterator::new(filtered).collect();
208        let frames: Vec<_> = items.into_iter().filter_map(Result::ok).collect();
209        assert_eq!(frames.len(), 2);
210        assert_eq!(frames[0].content, b"hello");
211        assert_eq!(frames[1].content, b"bye");
212    }
213
214    #[test]
215    fn frame_iterator_grep_separator_strips_from_content() {
216        use crate::FrameIterator;
217
218        let content = b"SIP/2.0 200 OK\r\nVia: a\r\nContent-Length: 0\r\n\r\n";
219        let mut data = Vec::new();
220        let header = format!(
221            "recv {} bytes from tcp/1.1.1.1:5060 at 00:00:00.000000:\n",
222            content.len()
223        );
224        data.extend_from_slice(header.as_bytes());
225        data.extend_from_slice(b"SIP/2.0 200 OK\r\nVia: a\r\n--\nContent-Length: 0\r\n\r\n");
226        data.extend_from_slice(b"\x0B\n");
227
228        let filtered = GrepFilter::new(&data[..]);
229        let frames: Vec<_> = FrameIterator::new(filtered)
230            .collect::<Result<Vec<_>, _>>()
231            .unwrap();
232        assert_eq!(frames.len(), 1);
233        assert_eq!(frames[0].content, content);
234    }
235}