1#![doc = include_str!("../README.md")]
2#![warn(
3 missing_docs,
4 missing_debug_implementations,
5 missing_copy_implementations,
6 trivial_casts,
7 trivial_numeric_casts,
8 unused_extern_crates,
9 unused_import_braces,
10 unused_qualifications,
11 variant_size_differences
12)]
13
14use std::io::{BufRead, Cursor, Read};
15
16use quick_xml::{Reader, Writer, events::Event};
17use regex::bytes::Regex;
18
19pub fn filter_xmls(mut input: impl BufRead, xpath: Option<&str>) {
21 let log_entry_date_regex =
22 Regex::new(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("valid regex");
23 let log_entry_non_ws_regex = Regex::new(r"^\S").expect("valid regex");
24
25 let mut buffer = [0u8; 1024];
26
27 let mut first_entry = Vec::new();
28
29 while let Ok(count) = input.read(&mut buffer) {
30 if count == 0 {
31 break;
32 }
33 first_entry.extend_from_slice(&buffer[..count]);
34 if first_entry.len() > 19 {
35 break;
36 }
37 }
38
39 let log_entry_regex = if log_entry_date_regex.is_match(&first_entry) {
40 Some(Regex::new(r"\n\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("valid regex"))
41 } else if log_entry_non_ws_regex.is_match(&first_entry) {
42 Some(Regex::new(r"\n\S").expect("valid regex"))
43 } else {
44 None
45 };
46
47 let mut head = first_entry.as_slice();
48
49 loop {
50 while let Some(pos) = head.iter().position(|&n| n == b'<') {
51 head = &head[pos..];
52
53 let cursor = Cursor::new(head);
54
55 let xml_candidate = cursor.chain(input);
56
57 let mut reader = Reader::from_reader(xml_candidate);
58
59 let mut buf = Vec::new();
60
61 let events = match reader.read_event_into(&mut buf) {
62 Ok(Event::Start(ref b)) => {
63 let (start, end) = (b.clone().into_owned(), b.to_end().into_owned());
64
65 let end = end.name();
66
67 let mut depth = 0;
68 let mut events = vec![Event::Start(start)];
69
70 loop {
71 let evt = reader.read_event_into(&mut buf);
72
73 if let Ok(e) = &evt {
74 events.push(e.clone().into_owned());
75 }
76
77 match evt {
78 Ok(Event::Start(ref e)) if e.name() == end => depth += 1,
79 Ok(Event::End(ref e)) if e.name() == end => {
80 if depth == 0 {
81 break Ok(events);
82 }
83 depth -= 1;
84 }
85 Ok(Event::Text(e)) => {
86 if let Some(log_entry_regex) = log_entry_regex.as_ref() {
87 if log_entry_regex.is_match(&e) {
88 break Err(());
89 }
90 }
91 }
92 Ok(Event::Eof) | Err(_) => break Err(()),
93 _ => (),
94 }
95 }
96 }
97 Ok(e @ Event::Empty(_)) => Ok(vec![e.clone().into_owned()]),
98 _ => Err(()),
99 };
100
101 if let Ok(events) = events {
102 let mut writer = Writer::new(Cursor::new(Vec::new()));
103 for event in events {
104 writer.write_event(event).expect("write event");
105 }
106 let buf = writer.into_inner().into_inner();
107 let xml = String::from_utf8_lossy(&buf);
108 if let Some(xpath) = xpath {
109 use amxml::dom::*;
110 let doc = new_document(&xml).expect("well formed XML");
111 let root = doc.root_element();
112 let result = root.eval_xpath(xpath).expect("XPath expression");
113
114 for item in (0..result.len()).filter_map(|i| result.get_item(i).as_nodeptr()) {
115 println!("{}", item.to_string());
116 }
117 } else {
118 println!("{xml}");
119 }
120 }
121
122 let (cursor, remaining_input) = reader.into_inner().into_inner();
123
124 input = remaining_input;
125
126 if (cursor.position() as usize) < head.len() {
127 head = &head[cursor.position() as usize..];
128 } else {
129 break;
130 }
131 }
132
133 if let Ok(count) = input.read(&mut buffer) {
134 if count == 0 {
135 break;
136 }
137 head = &buffer[..count];
138 } else {
139 break;
140 }
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 #[test]
147 fn it_works() {
148 assert_eq!(2 + 2, 4);
149 }
150}