Skip to main content

sif_parser/
reader.rs

1// SIF Core v1 — Streaming event-based reader.
2//
3// Reads a SIF document line-by-line and emits `Event` values.
4// Maintains internal state for the active schema so records are
5// parsed against the correct type definitions.
6//
7// Reference: SIF-SPEC.md §5 (Document Structure), §25.2 (For Parsers).
8
9use std::io::BufRead;
10
11use crate::error::{err, ErrorKind, Result};
12use crate::parse::parse_schema_str;
13use crate::types::*;
14
15/// A streaming, event-based SIF reader.
16///
17/// Reads lines from any `BufRead` source and yields `Event` values
18/// one at a time via `next_event()`.
19///
20/// ```ignore
21/// use sif_parser::{Reader, Event};
22/// let mut reader = Reader::from_str(sif_text);
23/// while let Some(event) = reader.next_event()? {
24///     match event {
25///         Event::Record(r) => { /* process record */ }
26///         _ => {}
27///     }
28/// }
29/// ```
30pub struct Reader<R: BufRead> {
31    source: R,
32    line_buf: String,
33    line_num: usize,
34    schema: Option<Schema>,
35    state: ReaderState,
36    header_read: bool,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40enum ReaderState {
41    Normal,
42    InBlock,
43    InTemplate,
44}
45
46impl<R: BufRead> Reader<R> {
47    /// Create a new reader from a buffered source.
48    pub fn new(source: R) -> Self {
49        Self {
50            source,
51            line_buf: String::new(),
52            line_num: 0,
53            schema: None,
54            state: ReaderState::Normal,
55            header_read: false,
56        }
57    }
58
59    /// Returns the current active schema, if any.
60    pub fn schema(&self) -> Option<&Schema> {
61        self.schema.as_ref()
62    }
63
64    /// Returns the current line number (1-based).
65    pub fn line_num(&self) -> usize {
66        self.line_num
67    }
68
69    /// Read the next event from the stream.
70    ///
71    /// Returns `Ok(None)` at end of input.
72    pub fn next_event(&mut self) -> Result<Option<Event>> {
73        loop {
74            self.line_buf.clear();
75            let bytes_read = self
76                .source
77                .read_line(&mut self.line_buf)
78                .map_err(|e| err(ErrorKind::UnexpectedEof, self.line_num, e.to_string()))?;
79
80            if bytes_read == 0 {
81                return Ok(None);
82            }
83
84            self.line_num += 1;
85            let line = self.line_buf.trim_end_matches('\n').trim_end_matches('\r');
86
87            // Handle BOM on first line
88            let line = if self.line_num == 1 {
89                line.strip_prefix('\u{FEFF}').unwrap_or(line)
90            } else {
91                line
92            };
93
94            // Inside a block — emit lines until #/block
95            if self.state == ReaderState::InBlock {
96                if line.trim_end() == "#/block" {
97                    self.state = ReaderState::Normal;
98                    return Ok(Some(Event::BlockEnd));
99                }
100                return Ok(Some(Event::BlockLine(line.to_string())));
101            }
102
103            // Inside a template — emit lines until #/template
104            if self.state == ReaderState::InTemplate {
105                if line.trim_end() == "#/template" {
106                    self.state = ReaderState::Normal;
107                    return Ok(Some(Event::TemplateEnd));
108                }
109                return Ok(Some(Event::TemplateLine(line.to_string())));
110            }
111
112            // Skip empty lines
113            if line.trim().is_empty() {
114                continue;
115            }
116
117            // Header
118            if !self.header_read {
119                self.header_read = true;
120                let header = crate::parse::parse_header_public(line, self.line_num)?;
121                return Ok(Some(Event::Header(header)));
122            }
123
124            // Skip #! after header (§5.3)
125            if line.starts_with("#!") {
126                continue;
127            }
128
129            // Section break
130            if line.trim_end() == "---" {
131                self.schema = None;
132                return Ok(Some(Event::SectionBreak));
133            }
134
135            // Section identifier
136            if line.starts_with('§') {
137                let id = &line['§'.len_utf8()..];
138                return Ok(Some(Event::SectionId(id.trim_end().to_string())));
139            }
140
141            // Block start
142            if line.starts_with("#block ") {
143                let rest = &line[7..];
144                let tokens: Vec<&str> = rest.split_whitespace().collect();
145                if tokens.is_empty() {
146                    return Err(err(ErrorKind::InvalidBlock, self.line_num, "missing block type"));
147                }
148                let block_type = match tokens[0] {
149                    "code" => BlockType::Code,
150                    "text" => BlockType::Text,
151                    "diff" => BlockType::Diff,
152                    "raw" => BlockType::Raw,
153                    "template" => BlockType::Template,
154                    other => {
155                        return Err(err(
156                            ErrorKind::InvalidBlock,
157                            self.line_num,
158                            format!("unknown block type: {}", other),
159                        ));
160                    }
161                };
162                let mut attrs = Vec::new();
163                for &token in &tokens[1..] {
164                    if let Some(eq) = token.find('=') {
165                        attrs.push((token[..eq].to_string(), token[eq + 1..].to_string()));
166                    }
167                }
168                self.state = ReaderState::InBlock;
169                return Ok(Some(Event::BlockStart {
170                    block_type,
171                    attributes: attrs,
172                }));
173            }
174
175            // Template start
176            if line.starts_with("#template ") {
177                let name = line[10..].trim().to_string();
178                self.state = ReaderState::InTemplate;
179                return Ok(Some(Event::TemplateStart(name)));
180            }
181
182            // Schema
183            if line.starts_with("#schema ") {
184                let schema_body = &line[8..];
185                let schema = parse_schema_str(schema_body, self.line_num)?;
186                self.schema = Some(schema.clone());
187                return Ok(Some(Event::Schema(schema)));
188            }
189
190            // Recall — emit as directive but don't change state
191            if line.trim_end() == "#recall schema" {
192                return Ok(Some(Event::Directive(Directive::Recall)));
193            }
194
195            // Other directives
196            if line.starts_with('#') {
197                if let Some(directive) = crate::parse::parse_directive_public(line, self.line_num)?
198                {
199                    return Ok(Some(Event::Directive(directive)));
200                }
201                continue;
202            }
203
204            // Record — parse against active schema
205            if let Some(ref schema) = self.schema {
206                let record =
207                    crate::parse::parse_record_public(line, schema, self.line_num)?;
208                return Ok(Some(Event::Record(record)));
209            } else {
210                return Err(err(
211                    ErrorKind::RecordWithoutSchema,
212                    self.line_num,
213                    "record found before any #schema in this section",
214                ));
215            }
216        }
217    }
218
219    /// Collect all events into a Vec (convenience for testing/small docs).
220    pub fn collect_events(&mut self) -> Result<Vec<Event>> {
221        let mut events = Vec::new();
222        while let Some(event) = self.next_event()? {
223            events.push(event);
224        }
225        Ok(events)
226    }
227}
228
229/// Create a reader from a string.
230impl Reader<std::io::BufReader<std::io::Cursor<String>>> {
231    pub fn from_str(input: &str) -> Self {
232        let cursor = std::io::Cursor::new(input.to_string());
233        Self::new(std::io::BufReader::new(cursor))
234    }
235}
236
237/// Create a reader from stdin.
238pub fn reader_stdin() -> Reader<std::io::BufReader<std::io::Stdin>> {
239    Reader::new(std::io::BufReader::new(std::io::stdin()))
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_streaming_reader() {
248        let input = "\
249#!sif v1
250#context Test
251#schema id:uint name:str
2521\talice
2532\tbob
254";
255        let mut reader = Reader::from_str(input);
256        let events = reader.collect_events().unwrap();
257
258        assert!(matches!(&events[0], Event::Header(_)));
259        assert!(matches!(&events[1], Event::Directive(Directive::Context(_))));
260        assert!(matches!(&events[2], Event::Schema(_)));
261        assert!(matches!(&events[3], Event::Record(_)));
262        assert!(matches!(&events[4], Event::Record(_)));
263        assert_eq!(events.len(), 5);
264    }
265
266    #[test]
267    fn test_streaming_sections() {
268        let input = "\
269#!sif v1
270§first
271#schema a:str
272hello
273---
274§second
275#schema b:uint
27642
277";
278        let mut reader = Reader::from_str(input);
279        let events = reader.collect_events().unwrap();
280
281        let section_ids: Vec<_> = events
282            .iter()
283            .filter_map(|e| match e {
284                Event::SectionId(id) => Some(id.as_str()),
285                _ => None,
286            })
287            .collect();
288        assert_eq!(section_ids, vec!["first", "second"]);
289
290        let breaks = events
291            .iter()
292            .filter(|e| matches!(e, Event::SectionBreak))
293            .count();
294        assert_eq!(breaks, 1);
295    }
296
297    #[test]
298    fn test_streaming_blocks() {
299        let input = "\
300#!sif v1
301#block code language=rust
302fn main() {}
303#/block
304";
305        let mut reader = Reader::from_str(input);
306        let events = reader.collect_events().unwrap();
307
308        assert!(matches!(
309            &events[1],
310            Event::BlockStart {
311                block_type: BlockType::Code,
312                ..
313            }
314        ));
315        assert!(matches!(&events[2], Event::BlockLine(_)));
316        assert!(matches!(&events[3], Event::BlockEnd));
317    }
318}