1use std::io::BufRead;
10
11use crate::error::{err, ErrorKind, Result};
12use crate::parse::parse_schema_str;
13use crate::types::*;
14
15pub struct Reader<R: BufRead> {
31 source: R,
32 line_buf: String,
33 line_num: usize,
34 schema: Option<Schema>,
35 state: ReaderState,
36 header_read: bool,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40enum ReaderState {
41 Normal,
42 InBlock,
43 InTemplate,
44}
45
46impl<R: BufRead> Reader<R> {
47 pub fn new(source: R) -> Self {
49 Self {
50 source,
51 line_buf: String::new(),
52 line_num: 0,
53 schema: None,
54 state: ReaderState::Normal,
55 header_read: false,
56 }
57 }
58
59 pub fn schema(&self) -> Option<&Schema> {
61 self.schema.as_ref()
62 }
63
64 pub fn line_num(&self) -> usize {
66 self.line_num
67 }
68
69 pub fn next_event(&mut self) -> Result<Option<Event>> {
73 loop {
74 self.line_buf.clear();
75 let bytes_read = self
76 .source
77 .read_line(&mut self.line_buf)
78 .map_err(|e| err(ErrorKind::UnexpectedEof, self.line_num, e.to_string()))?;
79
80 if bytes_read == 0 {
81 return Ok(None);
82 }
83
84 self.line_num += 1;
85 let line = self.line_buf.trim_end_matches('\n').trim_end_matches('\r');
86
87 let line = if self.line_num == 1 {
89 line.strip_prefix('\u{FEFF}').unwrap_or(line)
90 } else {
91 line
92 };
93
94 if self.state == ReaderState::InBlock {
96 if line.trim_end() == "#/block" {
97 self.state = ReaderState::Normal;
98 return Ok(Some(Event::BlockEnd));
99 }
100 return Ok(Some(Event::BlockLine(line.to_string())));
101 }
102
103 if self.state == ReaderState::InTemplate {
105 if line.trim_end() == "#/template" {
106 self.state = ReaderState::Normal;
107 return Ok(Some(Event::TemplateEnd));
108 }
109 return Ok(Some(Event::TemplateLine(line.to_string())));
110 }
111
112 if line.trim().is_empty() {
114 continue;
115 }
116
117 if !self.header_read {
119 self.header_read = true;
120 let header = crate::parse::parse_header_public(line, self.line_num)?;
121 return Ok(Some(Event::Header(header)));
122 }
123
124 if line.starts_with("#!") {
126 continue;
127 }
128
129 if line.trim_end() == "---" {
131 self.schema = None;
132 return Ok(Some(Event::SectionBreak));
133 }
134
135 if line.starts_with('§') {
137 let id = &line['§'.len_utf8()..];
138 return Ok(Some(Event::SectionId(id.trim_end().to_string())));
139 }
140
141 if line.starts_with("#block ") {
143 let rest = &line[7..];
144 let tokens: Vec<&str> = rest.split_whitespace().collect();
145 if tokens.is_empty() {
146 return Err(err(ErrorKind::InvalidBlock, self.line_num, "missing block type"));
147 }
148 let block_type = match tokens[0] {
149 "code" => BlockType::Code,
150 "text" => BlockType::Text,
151 "diff" => BlockType::Diff,
152 "raw" => BlockType::Raw,
153 "template" => BlockType::Template,
154 other => {
155 return Err(err(
156 ErrorKind::InvalidBlock,
157 self.line_num,
158 format!("unknown block type: {}", other),
159 ));
160 }
161 };
162 let mut attrs = Vec::new();
163 for &token in &tokens[1..] {
164 if let Some(eq) = token.find('=') {
165 attrs.push((token[..eq].to_string(), token[eq + 1..].to_string()));
166 }
167 }
168 self.state = ReaderState::InBlock;
169 return Ok(Some(Event::BlockStart {
170 block_type,
171 attributes: attrs,
172 }));
173 }
174
175 if line.starts_with("#template ") {
177 let name = line[10..].trim().to_string();
178 self.state = ReaderState::InTemplate;
179 return Ok(Some(Event::TemplateStart(name)));
180 }
181
182 if line.starts_with("#schema ") {
184 let schema_body = &line[8..];
185 let schema = parse_schema_str(schema_body, self.line_num)?;
186 self.schema = Some(schema.clone());
187 return Ok(Some(Event::Schema(schema)));
188 }
189
190 if line.trim_end() == "#recall schema" {
192 return Ok(Some(Event::Directive(Directive::Recall)));
193 }
194
195 if line.starts_with('#') {
197 if let Some(directive) = crate::parse::parse_directive_public(line, self.line_num)?
198 {
199 return Ok(Some(Event::Directive(directive)));
200 }
201 continue;
202 }
203
204 if let Some(ref schema) = self.schema {
206 let record =
207 crate::parse::parse_record_public(line, schema, self.line_num)?;
208 return Ok(Some(Event::Record(record)));
209 } else {
210 return Err(err(
211 ErrorKind::RecordWithoutSchema,
212 self.line_num,
213 "record found before any #schema in this section",
214 ));
215 }
216 }
217 }
218
219 pub fn collect_events(&mut self) -> Result<Vec<Event>> {
221 let mut events = Vec::new();
222 while let Some(event) = self.next_event()? {
223 events.push(event);
224 }
225 Ok(events)
226 }
227}
228
229impl Reader<std::io::BufReader<std::io::Cursor<String>>> {
231 pub fn from_str(input: &str) -> Self {
232 let cursor = std::io::Cursor::new(input.to_string());
233 Self::new(std::io::BufReader::new(cursor))
234 }
235}
236
237pub fn reader_stdin() -> Reader<std::io::BufReader<std::io::Stdin>> {
239 Reader::new(std::io::BufReader::new(std::io::stdin()))
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245
246 #[test]
247 fn test_streaming_reader() {
248 let input = "\
249#!sif v1
250#context Test
251#schema id:uint name:str
2521\talice
2532\tbob
254";
255 let mut reader = Reader::from_str(input);
256 let events = reader.collect_events().unwrap();
257
258 assert!(matches!(&events[0], Event::Header(_)));
259 assert!(matches!(&events[1], Event::Directive(Directive::Context(_))));
260 assert!(matches!(&events[2], Event::Schema(_)));
261 assert!(matches!(&events[3], Event::Record(_)));
262 assert!(matches!(&events[4], Event::Record(_)));
263 assert_eq!(events.len(), 5);
264 }
265
266 #[test]
267 fn test_streaming_sections() {
268 let input = "\
269#!sif v1
270§first
271#schema a:str
272hello
273---
274§second
275#schema b:uint
27642
277";
278 let mut reader = Reader::from_str(input);
279 let events = reader.collect_events().unwrap();
280
281 let section_ids: Vec<_> = events
282 .iter()
283 .filter_map(|e| match e {
284 Event::SectionId(id) => Some(id.as_str()),
285 _ => None,
286 })
287 .collect();
288 assert_eq!(section_ids, vec!["first", "second"]);
289
290 let breaks = events
291 .iter()
292 .filter(|e| matches!(e, Event::SectionBreak))
293 .count();
294 assert_eq!(breaks, 1);
295 }
296
297 #[test]
298 fn test_streaming_blocks() {
299 let input = "\
300#!sif v1
301#block code language=rust
302fn main() {}
303#/block
304";
305 let mut reader = Reader::from_str(input);
306 let events = reader.collect_events().unwrap();
307
308 assert!(matches!(
309 &events[1],
310 Event::BlockStart {
311 block_type: BlockType::Code,
312 ..
313 }
314 ));
315 assert!(matches!(&events[2], Event::BlockLine(_)));
316 assert!(matches!(&events[3], Event::BlockEnd));
317 }
318}