xml_log_scan/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(
3    missing_docs,
4    missing_debug_implementations,
5    missing_copy_implementations,
6    trivial_casts,
7    trivial_numeric_casts,
8    unused_extern_crates,
9    unused_import_braces,
10    unused_qualifications,
11    variant_size_differences
12)]
13
14use std::io::{BufRead, Cursor, Read};
15
16use quick_xml::{Reader, Writer, events::Event};
17use regex::bytes::Regex;
18
19/// Filter and print XMLs.
20pub fn filter_xmls(mut input: impl BufRead, xpath: Option<&str>) {
21    let log_entry_date_regex =
22        Regex::new(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("valid regex");
23    let log_entry_non_ws_regex = Regex::new(r"^\S").expect("valid regex");
24
25    let mut buffer = [0u8; 1024];
26
27    let mut first_entry = Vec::new();
28
29    while let Ok(count) = input.read(&mut buffer) {
30        if count == 0 {
31            break;
32        }
33        first_entry.extend_from_slice(&buffer[..count]);
34        if first_entry.len() > 19 {
35            break;
36        }
37    }
38
39    let log_entry_regex = if log_entry_date_regex.is_match(&first_entry) {
40        Some(Regex::new(r"\n\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("valid regex"))
41    } else if log_entry_non_ws_regex.is_match(&first_entry) {
42        Some(Regex::new(r"\n\S").expect("valid regex"))
43    } else {
44        None
45    };
46
47    let mut head = first_entry.as_slice();
48
49    loop {
50        while let Some(pos) = head.iter().position(|&n| n == b'<') {
51            head = &head[pos..];
52
53            let cursor = Cursor::new(head);
54
55            let xml_candidate = cursor.chain(input);
56
57            let mut reader = Reader::from_reader(xml_candidate);
58
59            let mut buf = Vec::new();
60
61            let events = match reader.read_event_into(&mut buf) {
62                Ok(Event::Start(ref b)) => {
63                    let (start, end) = (b.clone().into_owned(), b.to_end().into_owned());
64
65                    let end = end.name();
66
67                    let mut depth = 0;
68                    let mut events = vec![Event::Start(start)];
69
70                    loop {
71                        let evt = reader.read_event_into(&mut buf);
72
73                        if let Ok(e) = &evt {
74                            events.push(e.clone().into_owned());
75                        }
76
77                        match evt {
78                            Ok(Event::Start(ref e)) if e.name() == end => depth += 1,
79                            Ok(Event::End(ref e)) if e.name() == end => {
80                                if depth == 0 {
81                                    break Ok(events);
82                                }
83                                depth -= 1;
84                            }
85                            Ok(Event::Text(e)) => {
86                                if let Some(log_entry_regex) = log_entry_regex.as_ref() {
87                                    if log_entry_regex.is_match(&e) {
88                                        break Err(());
89                                    }
90                                }
91                            }
92                            Ok(Event::Eof) | Err(_) => break Err(()),
93                            _ => (),
94                        }
95                    }
96                }
97                Ok(e @ Event::Empty(_)) => Ok(vec![e.clone().into_owned()]),
98                _ => Err(()),
99            };
100
101            if let Ok(events) = events {
102                let mut writer = Writer::new(Cursor::new(Vec::new()));
103                for event in events {
104                    writer.write_event(event).expect("write event");
105                }
106                let buf = writer.into_inner().into_inner();
107                let xml = String::from_utf8_lossy(&buf);
108                if let Some(xpath) = xpath {
109                    use amxml::dom::*;
110                    let doc = new_document(&xml).expect("well formed XML");
111                    let root = doc.root_element();
112                    let result = root.eval_xpath(xpath).expect("XPath expression");
113
114                    for item in (0..result.len()).filter_map(|i| result.get_item(i).as_nodeptr()) {
115                        println!("{}", item.to_string());
116                    }
117                } else {
118                    println!("{xml}");
119                }
120            }
121
122            let (cursor, remaining_input) = reader.into_inner().into_inner();
123
124            input = remaining_input;
125
126            if (cursor.position() as usize) < head.len() {
127                head = &head[cursor.position() as usize..];
128            } else {
129                break;
130            }
131        }
132
133        if let Ok(count) = input.read(&mut buffer) {
134            if count == 0 {
135                break;
136            }
137            head = &buffer[..count];
138        } else {
139            break;
140        }
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    #[test]
147    fn it_works() {
148        assert_eq!(2 + 2, 4);
149    }
150}